Exemplo n.º 1
0
def _kl_expfamily_expfamily(p, q):
    """Compute kl-divergence using `Bregman divergences <https://www.lix.polytechnique.fr/~nielsen/EntropyEF-ICIP2010.pdf>`_
    """
    if not type(p) == type(q):
        raise NotImplementedError

    p_natural_params = []
    for param in p._natural_parameters:
        param = param.detach()
        param.stop_gradient = False
        p_natural_params.append(param)

    q_natural_params = q._natural_parameters

    p_log_norm = p._log_normalizer(*p_natural_params)

    try:
        if _non_static_mode():
            p_grads = paddle.grad(p_log_norm,
                                  p_natural_params,
                                  create_graph=True)
        else:
            p_grads = paddle.static.gradients(p_log_norm, p_natural_params)
    except RuntimeError as e:
        raise TypeError(
            "Cann't compute kl_divergence({cls_p}, {cls_q}) use bregman divergence. Please register_kl({cls_p}, {cls_q})."
            .format(cls_p=type(p).__name__, cls_q=type(q).__name__)) from e

    kl = q._log_normalizer(*q_natural_params) - p_log_norm
    for p_param, q_param, p_grad in zip(p_natural_params, q_natural_params,
                                        p_grads):
        term = (q_param - p_param) * p_grad
        kl -= _sum_rightmost(term, len(q.event_shape))

    return kl
Exemplo n.º 2
0
def enabled():
    """
    This function checks whether the program runs in dynamic graph mode or not.
    You can enter dynamic graph mode with :ref:`api_fluid_dygraph_guard` api,
    or enable and disable dynamic graph mode with :ref:`api_fluid_dygraph_enable_dygraph`
    and :ref:`api_fluid_dygraph_disable_dygraph` api .

    **Note**:
        ``fluid.dygraph.enabled`` is the alias of ``fluid.in_dygraph_mode``, and
        ``fluid.in_dygraph_mode`` is recommended to use for now.

    Returns:
        bool: Whether the program is running in dynamic graph mode.

    Examples:
        .. code-block:: python

            import paddle.fluid as fluid

            fluid.enable_dygraph()  # Now we are in dygragh mode
            print(fluid.dygraph.enabled())  # True
            fluid.disable_dygraph()
            print(fluid.dygraph.enabled())  # False
    """
    # TODO(jiabin): Make this check as in_dygraph_mode when we support default eager mode.
    return framework._non_static_mode()
Exemplo n.º 3
0
    def log_prob(self, value):
        """Log probability density/mass function.

        Args:
          value (Tensor): The input tensor.

        Returns:
          Tensor: log probability.The data type is same with value.

        """
        value = self._check_values_dtype_in_probs(self.low, value)
        if _non_static_mode():
            # ensure value in [low, high]
            lb_bool = self.low < value
            ub_bool = value < self.high

            lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype',
                             value.dtype)
            ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype',
                             value.dtype)
            return nn.log(lb * ub) - nn.log(self.high - self.low)

        name = self.name + '_log_prob'
        lb_bool = self.low < value
        ub_bool = value < self.high
        lb = tensor.cast(lb_bool, dtype=value.dtype)
        ub = tensor.cast(ub_bool, dtype=value.dtype)
        return elementwise_sub(nn.log(lb * ub),
                               nn.log(self.high - self.low),
                               name=name)
Exemplo n.º 4
0
def _ndarray_to_tensor(obj, return_numpy):
    if return_numpy:
        return obj
    if _non_static_mode():
        return paddle.to_tensor(obj)
    else:
        return _to_LodTensor(obj)
Exemplo n.º 5
0
def _parse_load_result(obj, return_numpy):
    def is_layer(obj):
        return isinstance(obj, fluid.Layer)

    def parse_layer(obj):
        temp_dict = _parse_load_result(obj.__dict__, False)
        obj.__dict__.update(temp_dict)
        return obj

    if _contain_x(obj, is_layer):
        if not _non_static_mode():
            raise ValueError(
                "Layer can only be loaded in dynamic graph mode, but now in static graph mode."
            )

        _parse_every_object(obj, is_layer, parse_layer)

    def tuple_to_tensor(obj):
        return _tuple_to_tensor(obj, return_numpy=return_numpy)

    def ndarray_to_tensor(obj):
        return _ndarray_to_tensor(obj, return_numpy=return_numpy)

    # tuple(name, ndarry) was converted from varbase of paddle2.1, 
    # and all tuple(name, ndarry) are converted to tensor.
    if _contain_x(obj, _transformed_from_varbase):
        return _parse_every_object(obj, _transformed_from_varbase,
                                   tuple_to_tensor)
    # If there is no tuple(name, ndary), it is considered to be saved by paddle2.0 
    # or converted from LoDTensor, and all ndarrays are converted to tensor.
    else:
        return _parse_every_object(obj, _transformed_from_lodtensor,
                                   ndarray_to_tensor)
Exemplo n.º 6
0
    def forward(self, input, label, length=None):
        if _non_static_mode():
            _, _, _, log_likelihood = _C_ops.linear_chain_crf(
                input, self._transition, label, length, "is_test",
                self._is_test)
            return log_likelihood

        alpha = self._helper.create_variable_for_type_inference(
            dtype=self._dtype)
        emission_exps = self._helper.create_variable_for_type_inference(
            dtype=self._dtype)
        transition_exps = self._helper.create_variable_for_type_inference(
            dtype=self._dtype)
        log_likelihood = self._helper.create_variable_for_type_inference(
            dtype=self._dtype)
        this_inputs = {
            "Emission": [input],
            "Transition": self._transition,
            "Label": [label]
        }
        if length is not None:
            this_inputs['Length'] = [length]
        self._helper.append_op(
            type='linear_chain_crf',
            inputs=this_inputs,
            outputs={
                "Alpha": [alpha],
                "EmissionExps": [emission_exps],
                "TransitionExps": transition_exps,
                "LogLikelihood": log_likelihood
            },
            attrs={"is_test": self._is_test, })
        return log_likelihood
Exemplo n.º 7
0
    def __init__(self, logits, name=None):
        """
        Args:
            logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64.
            name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
        """
        if not _non_static_mode():
            check_type(logits, 'logits',
                       (np.ndarray, tensor.Variable, list, tuple),
                       'Categorical')

        self.name = name if name is not None else 'Categorical'
        self.dtype = 'float32'

        if self._validate_args(logits):
            self.logits = logits
            self.dtype = convert_dtype(logits.dtype)
        else:
            if isinstance(logits, np.ndarray) and str(
                    logits.dtype) in ['float32', 'float64']:
                self.dtype = logits.dtype
            self.logits = self._to_tensor(logits)[0]
            if self.dtype != convert_dtype(self.logits.dtype):
                self.logits = tensor.cast(self.logits, dtype=self.dtype)
        dist_sum = paddle.sum(self.logits, axis=-1, keepdim=True)
        self._prob = self.logits / dist_sum
Exemplo n.º 8
0
def prepare_context(strategy=None):
    '''
    :api_attr: imperative
    '''
    if strategy is None:
        strategy = ParallelStrategy()
        strategy.nranks = Env().nranks
        strategy.local_rank = Env().local_rank
        strategy.trainer_endpoints = Env().trainer_endpoints
        strategy.current_endpoint = Env().current_endpoint
    if strategy.nranks < 2:
        return
    assert framework._non_static_mode() is True, \
        "dygraph.prepare_context should be used with dygraph mode."
    place = framework._current_expected_place()
    assert place is not None, \
        "dygraph.prepare_context should be used in fluid.dygraph.guard(place) guard."
    if not parallel_helper._is_parallel_ctx_initialized():
        if isinstance(place, core.CUDAPlace):
            parallel_helper._set_parallel_ctx(
                core.NCCLParallelContext(strategy, place))
        elif isinstance(place, core.XPUPlace):
            parallel_helper._set_parallel_ctx(
                core.BKCLParallelContext(strategy, place))
        elif isinstance(place, core.NPUPlace):
            parallel_helper._set_parallel_ctx(
                core.HCCLParallelContext(strategy, place))
        else:
            # TODO(Yancey1989): add Gloo Parallel Context to support CPU parallel computation
            assert ("Only support CUDAPlace or XPUPlace or NPUPlace for now.")
        parallel_helper._init_parallel_ctx()
    return strategy
Exemplo n.º 9
0
    def entropy(self):
        """caculate entropy use `bregman divergence` 
        https://www.lix.polytechnique.fr/~nielsen/EntropyEF-ICIP2010.pdf
        """
        entropy_value = -self._mean_carrier_measure

        natural_parameters = []
        for parameter in self._natural_parameters:
            parameter = parameter.detach()
            parameter.stop_gradient = False
            natural_parameters.append(parameter)

        log_norm = self._log_normalizer(*natural_parameters)

        if _non_static_mode():
            grads = paddle.grad(log_norm.sum(),
                                natural_parameters,
                                create_graph=True)
        else:
            grads = paddle.static.gradients(log_norm.sum(), natural_parameters)

        entropy_value += log_norm
        for p, g in zip(natural_parameters, grads):
            entropy_value -= p * g

        return entropy_value
Exemplo n.º 10
0
    def _check_values_dtype_in_probs(self, param, value):
        """
        Log_prob and probs methods have input ``value``, if value's dtype is different from param,
        convert value's dtype to be consistent with param's dtype.

        Args:
            param (Tensor): low and high in Uniform class, loc and scale in Normal class.
            value (Tensor): The input tensor.

        Returns:
            value (Tensor): Change value's dtype if value's dtype is different from param.
        """
        if _non_static_mode():
            if value.dtype != param.dtype and convert_dtype(
                    value.dtype) in ['float32', 'float64']:
                warnings.warn(
                    "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
                )
                return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype',
                                   param.dtype)
            return value

        check_variable_and_dtype(value, 'value', ['float32', 'float64'],
                                 'log_prob')
        if value.dtype != param.dtype:
            warnings.warn(
                "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted."
            )
            return tensor.cast(value, dtype=param.dtype)
        return value
Exemplo n.º 11
0
    def probs(self, value):
        """Probability density/mass function.

        Args:
          value (Tensor): The input tensor.

        Returns:
          Tensor: probability.The data type is same with value.

        """
        value = self._check_values_dtype_in_probs(self.low, value)
        if _non_static_mode():
            lb_bool = self.low < value
            ub_bool = value < self.high

            lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype',
                             value.dtype)
            ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype',
                             value.dtype)
            return (lb * ub) / (self.high - self.low)

        name = self.name + '_probs'
        lb_bool = self.low < value
        ub_bool = value < self.high
        lb = tensor.cast(lb_bool, dtype=value.dtype)
        ub = tensor.cast(ub_bool, dtype=value.dtype)
        return elementwise_div((lb * ub), (self.high - self.low), name=name)
Exemplo n.º 12
0
    def kl_divergence(self, other):
        """The KL-divergence between two Categorical distributions.

        Args:
            other (Categorical): instance of Categorical. The data type is float32.

        Returns:
            Tensor: kl-divergence between two Categorical distributions.

        Examples:
            .. code-block:: python

                import paddle
                from paddle.distribution import Categorical

                paddle.seed(100) # on CPU device
                x = paddle.rand([6])
                print(x)
                # [0.5535528  0.20714243 0.01162981
                #  0.51577556 0.36369765 0.2609165 ]

                paddle.seed(200) # on CPU device
                y = paddle.rand([6])
                print(y)
                # [0.77663314 0.90824795 0.15685187
                #  0.04279523 0.34468332 0.7955718 ]

                cat = Categorical(x)
                cat2 = Categorical(y)

                cat.kl_divergence(cat2)
                # [0.071952]

        """
        name = self.name + '_kl_divergence'
        if not _non_static_mode():
            check_type(other, 'other', Categorical, 'kl_divergence')

        logits = self.logits - \
            paddle.max(self.logits, axis=-1, keepdim=True)
        other_logits = other.logits - paddle.max(
            other.logits, axis=-1, keepdim=True)
        e_logits = ops.exp(logits)
        other_e_logits = ops.exp(other_logits)
        z = paddle.sum(e_logits, axis=-1, keepdim=True)
        other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True)
        prob = e_logits / z
        kl = paddle.sum(
            prob *
            (logits - paddle.log(z) - other_logits + paddle.log(other_z)),
            axis=-1,
            keepdim=True,
            name=name)

        return kl
Exemplo n.º 13
0
    def sample(self, shape):
        """Generate samples of the specified shape.

        Args:
            shape (list): Shape of the generated samples.

        Returns:
            Tensor: A tensor with prepended dimensions shape.

        Examples:
            .. code-block:: python

                import paddle
                from paddle.distribution import Categorical

                paddle.seed(100) # on CPU device
                x = paddle.rand([6])
                print(x)
                # [0.5535528  0.20714243 0.01162981
                #  0.51577556 0.36369765 0.2609165 ]

                cat = Categorical(x)

                paddle.seed(1000) # on CPU device
                cat.sample([2,3])
                # [[0, 0, 5],
                #  [3, 4, 5]]

        """
        name = self.name + '_sample'
        if not _non_static_mode():
            check_type(shape, 'shape', (list), 'sample')

        num_samples = np.prod(np.array(shape))

        logits_shape = list(self.logits.shape)
        if len(logits_shape) > 1:
            sample_shape = shape + logits_shape[:-1]
            logits = paddle.reshape(
                self.logits, [np.prod(logits_shape[:-1]), logits_shape[-1]])
        else:
            sample_shape = shape
            logits = self.logits

        sample_index = multinomial(self._logits_to_probs(logits), num_samples,
                                   True)

        # multinomial sample shape is (logits.shape[:-1], num_samples), need to
        # tanspose to (num_samples, logits.shape[:-1])
        permute = list(range(sample_index.dim()))
        permute.insert(0, permute.pop(-1))
        sample_index = sample_index.transpose(permute)

        return paddle.reshape(sample_index, sample_shape, name=name)
Exemplo n.º 14
0
    def _append_optimize_op(self, block, param_and_grad):
        assert isinstance(block, framework.Block)

        sum_1 = self._get_accumulator('sum_1', param_and_grad[0])
        sum_2 = self._get_accumulator('sum_2', param_and_grad[0])
        sum_3 = self._get_accumulator('sum_3', param_and_grad[0])
        num_accumulates = self._get_accumulator('num_accumulates',
                                                param_and_grad[0])
        old_num_accumulates = self._get_accumulator('old_num_accumulates',
                                                    param_and_grad[0])
        num_updates = self._get_accumulator('num_updates', param_and_grad[0])
        if framework._non_static_mode():
            _, _, _, _, _, _ = _C_ops.average_accumulates(
                param_and_grad[0], sum_1, sum_2, sum_3, num_accumulates,
                old_num_accumulates, num_updates, sum_1, sum_2, sum_3,
                num_accumulates, old_num_accumulates, num_updates,
                'average_window', self.average_window, 'min_average_window',
                self.min_average_window, 'max_average_window',
                self.max_average_window)
            return None

        block = framework.default_main_program().global_block()
        attrs = {
            "average_window": self.average_window,
            "min_average_window": self.min_average_window,
            "max_average_window": self.max_average_window,
        }

        inputs = {
            "param": param_and_grad[0],
            "in_sum_1": sum_1,
            "in_sum_2": sum_2,
            "in_sum_3": sum_3,
            "in_num_accumulates": num_accumulates,
            "in_old_num_accumulates": old_num_accumulates,
            "in_num_updates": num_updates
        }

        outputs = {
            "out_sum_1": sum_1,
            "out_sum_2": sum_2,
            "out_sum_3": sum_3,
            "out_num_accumulates": num_accumulates,
            "out_old_num_accumulates": old_num_accumulates,
            "out_num_updates": num_updates,
        }

        average_accumulates_op = block.append_op(type=self.type,
                                                 inputs=inputs,
                                                 outputs=outputs,
                                                 attrs=attrs,
                                                 stop_gradient=True)

        return average_accumulates_op
Exemplo n.º 15
0
def _tuple_to_tensor(obj, return_numpy):
    if return_numpy:
        return obj[1]
    if _non_static_mode():
        t = paddle.to_tensor(obj[1])
        # This function does modify the name of return value.
        # Loading the same variable multiple times may cause the same name.
        t.name = obj[0]
        return t
    else:
        return _to_LodTensor(obj[1])
Exemplo n.º 16
0
    def minimize(self,
                 loss,
                 startup_program=None,
                 parameters=None,
                 no_grad_set=None):
        """
        Add operations to minimize ``loss`` by updating ``parameters``.
        
        Args:
            loss (Tensor): A ``Tensor`` containing the value to minimize.
            startup_program (Program, optional): :ref:`api_fluid_Program` for
                initializing parameters in ``parameters``. The default value
                is None, at this time :ref:`api_fluid_default_startup_program` will be used.
            parameters (list, optional): List of ``Tensor`` or ``Tensor.name`` to update
                to minimize ``loss``. The default value is None, at this time all parameters
                will be updated.
            no_grad_set (set, optional): Set of ``Tensor``  or ``Tensor.name`` that don't need
                to be updated. The default value is None.
        
        Returns:
            tuple: tuple (optimize_ops, params_grads), A list of operators appended
            by minimize and a list of (param, grad) tensor pairs, param is
            ``Parameter``, grad is the gradient value corresponding to the parameter.
            In static graph mode, the returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to 
            indicate program pruning. If so, the program will be pruned by ``feed`` and 
            ``fetch_list`` before run, see details in ``Executor``.
        
        Examples:
        
            .. code-block:: python

                import paddle
                import numpy as np
                inp = paddle.to_tensor(np.random.random([1, 10]).astype('float32'))
                linear = paddle.nn.Linear(10, 1)
                out = linear(inp)
                loss = paddle.mean(out)
                loss.backward()

                sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters())
                sgd.minimize(loss)

                modelaverage = paddle.incubate.ModelAverage(0.15,
                                                            parameters=linear.parameters(),
                                                            min_average_window=2,
                                                            max_average_window=4)
                modelaverage.minimize(loss)
                sgd.clear_grad()
                modelaverage.clear_grad()

        """
        if framework._non_static_mode():
            self.step()
Exemplo n.º 17
0
    def _append_optimize_op(self, block, param_and_grad):
        assert isinstance(block, framework.Block)

        velocity_acc = self._get_accumulator(self._velocity_acc_str,
                                             param_and_grad[0])
        lr = self._create_param_lr(param_and_grad)

        find_master = self._multi_precision and param_and_grad[
            0].dtype == core.VarDesc.VarType.FP16
        master_weight = (self._master_weights[param_and_grad[0].name]
                         if find_master else None)

        if framework._non_static_mode():
            _, _, _ = _C_ops.momentum(
                param_and_grad[0], param_and_grad[1], velocity_acc, lr,
                master_weight, param_and_grad[0], velocity_acc, master_weight,
                'mu', self._momentum, 'use_nesterov', self._use_nesterov,
                'regularization_method', self._regularization_method,
                'regularization_coeff', self._regularization_coeff,
                'multi_precision', find_master)
            return None

        attrs = {
            "mu": self._momentum,
            "use_nesterov": self._use_nesterov,
            "regularization_method": self._regularization_method,
            "regularization_coeff": self._regularization_coeff,
            "multi_precision": find_master,
            "rescale_grad": self._rescale_grad
        }
        inputs = {
            "Param": [param_and_grad[0]],
            "Grad": [param_and_grad[1]],
            "Velocity": [velocity_acc],
            "LearningRate": [lr]
        }
        outputs = {
            "ParamOut": [param_and_grad[0]],
            "VelocityOut": [velocity_acc]
        }

        if find_master:
            inputs["MasterParam"] = master_weight
            outputs["MasterParamOut"] = master_weight

        # create the momentum optimize op
        momentum_op = block.append_op(type=self.type,
                                      inputs=inputs,
                                      outputs=outputs,
                                      attrs=attrs,
                                      stop_gradient=True)

        return momentum_op
Exemplo n.º 18
0
    def valid(self):
        if _non_static_mode():
            return False

        return self._run_env is not None and \
            self._platform is not None and \
            self._job_id is not None and \
            self._hdfs_home is not None and \
            self._hdfs_name is not None and \
            self._hdfs_ugi is not None and \
            self._hdfs_checkpoint_path is not None and \
            self._trainer_id is not None
Exemplo n.º 19
0
def fused_matmul_bias(x,
                      y,
                      bias=None,
                      transpose_x=False,
                      transpose_y=False,
                      name=None):
    """
    Applies matrix multiplication of two tensors and then bias addition if provided.
    This method requires CUDA version >= 11.6. 

    Args:
        x (Tensor): the first input Tensor to be multiplied.
        y (Tensor): the second input Tensor to be multiplied. Its rank must be 2.  
        bias (Tensor|None): the input bias Tensor. If it is None, no bias addition would
            be performed. Otherwise, the bias is added to the matrix multiplication result.  
        transpose_x (bool): Whether to transpose :math:`x` before multiplication.
        transpose_y (bool): Whether to transpose :math:`y` before multiplication.    
        name(str|None): For detailed information, please refer to 
            :ref:`api_guide_Name` . Usually name is no need to set and None by default. 

    Returns:
        Tensor: the output Tensor. 

    Examples:
        .. code-block:: python

            # required: gpu
            import paddle
            from paddle.incubate.nn.functional import fused_matmul_bias
            
            x = paddle.randn([3, 4]) 
            y = paddle.randn([4, 5])
            bias = paddle.randn([5])
            out = fused_matmul_bias(x, y, bias) 
            print(out.shape) # [3, 5]
    """
    if bias is None:
        return matmul(x, y, transpose_x, transpose_y, name)
    if _non_static_mode():
        return _C_ops.fused_gemm_epilogue(x, y, bias, 'trans_x', transpose_x,
                                          'trans_y', transpose_y)

    helper = LayerHelper('fused_matmul_bias', **locals())
    out = helper.create_variable_for_type_inference(dtype=x.dtype)
    helper.append_op(
        type='fused_gemm_epilogue',
        inputs={'X': x,
                'Y': y,
                'Bias': bias},
        outputs={'Out': out},
        attrs={'trans_x': transpose_x,
               'trans_y': transpose_y})
    return out
Exemplo n.º 20
0
def softmax_mask_fuse(x, mask, name=None):
    """
    Do a masked softmax on x.

    This is designed for speeding up Transformer structure.
    Used for reducing operation such as: tmp = x + mask, out = softmax(tmp).
    The equation is:

    .. math::
        out = softmax(x + mask)

    **Note**:
        This API only supports GPU.

    Args:
        x (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32.
                        The fourth dimension of x must be larger or equal to 32 and less then 8192.
        mask (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32.
                           The second dimension of mask must be 1, and other dimensions must be same with x.
        name (str, optional): Name for the operation (optional, default is None).
                              For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        4-D Tensor. A location into which the result is stored. It’s dimension is 4D. Has same shape with x.

    Examples:
        .. code-block:: python

            # required: gpu
            import paddle
            import paddle.incubate as incubate

            x = paddle.rand([2, 8, 8, 32])
            mask = paddle.rand([2, 1, 8, 32])

            rst = incubate.softmax_mask_fuse(x, mask)
            # [[[[0.02404429, 0.04658398, 0.02746007, ..., 0.01489375, 0.02397441, 0.02851614] ... ]]]
    """
    if _non_static_mode():
        out = _C_ops.fused_softmax_mask(x, mask)
        return out
    helper = LayerHelper('fused_softmax_mask', **locals())
    out = helper.create_variable_for_type_inference(dtype=x.dtype)
    helper.append_op(type='fused_softmax_mask',
                     inputs={
                         'X': [x],
                         'Mask': [mask]
                     },
                     outputs={'Out': [out]})
    return out
def softmax_mask_fuse_upper_triangle(x):
    """
    Do a masked softmax on x, which will always mask upper triangle part of x.

    This is designed for speeding up GPT kind Transformer structure.
    Used for reducing operation such as: tmp = x + mask, out = softmax(tmp), where the mask is
    always be an upper triangle matrix.
    The equation is:

    .. math::
        out = softmax(LowerTriangular(x))

    **Note**:
        This API only supports GPU.

    Args:
        x (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32
                        The fourth dimension of x must be larger or equal to 32 and less then 8192.
                        The third dimension of x must be same with the fourth dimension of x.

    Returns:
        4-D Tensor. A location into which the result is stored. It’s dimension is 4D. Has same dimension with x.

    Examples:
        .. code-block:: python

            # required: gpu
            import paddle
            import paddle.incubate as incubate

            x = paddle.rand((1, 1, 32, 32))

            rst = incubate.softmax_mask_fuse_upper_triangle(x)
            # [[[[1.        , 0.        , 0.        , ..., 0., 0., 0.],
            #    [0.45324376, 0.54675621, 0.        , ..., 0., 0., 0.],
            #    [0.32674268, 0.28156221, 0.39169508, ..., 0., 0., 0.]
            #     ... ]]]
    """
    if _non_static_mode():
        out = _C_ops.fused_softmax_mask_upper_triangle(x)
        return out

    helper = LayerHelper('fused_softmax_mask_upper_triangle', **locals())

    out = helper.create_variable_for_type_inference(dtype=x.dtype)

    helper.append_op(type='fused_softmax_mask_upper_triangle',
                     inputs={'X': [x]},
                     outputs={'Out': [out]})
    return out
Exemplo n.º 22
0
    def restore(self, executor=None):
        """
        Restore ``Parameter`` values of current model.
        
        Args:
            executor(Executor): The network executor in static-graph mode. The default value is None in dygraph mode

        Examples:

            .. code-block:: python

                import paddle
                import numpy as np
                inp = paddle.to_tensor(np.random.random([1, 10]).astype('float32'))
                linear = paddle.nn.Linear(10, 1)
                out = linear(inp)
                loss = paddle.mean(out)
                loss.backward()

                sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters())

                modelaverage = paddle.incubate.ModelAverage(0.15,
                                                            parameters=linear.parameters(),
                                                            min_average_window=2,
                                                            max_average_window=4)
                sgd.step()
                modelaverage.step()
                
                with modelaverage.apply(need_restore=False):
                    for param in linear.parameters():
                        print(param)

                for param in linear.parameters():
                    print(param)

                modelaverage.restore()

                for param in linear.parameters():
                    print(param)
        """
        if framework._non_static_mode():
            for param in self._parameter_list:
                param_restore = self._get_accumulator('restore', param)
                paddle.assign(param_restore, param)
            return
        if executor is None:
            raise RuntimeError(
                "Executor should not be None in static graph mode.")
        executor.run(self.restore_program)
Exemplo n.º 23
0
def param_guard(parameters):
    # Note: parameters is a reference of self._parameters or self._buffers
    if in_declarative_mode(
    ) and not framework._non_static_mode() and parameters:
        origin_parameters = parameters.copy()
        for name, var_base in parameters.items():
            if isinstance(var_base, list):
                new_var = [_convert_into_variable(var) for var in var_base]
            else:
                new_var = _convert_into_variable(var_base)
            parameters[name] = new_var
        yield
        parameters.update(origin_parameters)
    else:
        yield
Exemplo n.º 24
0
def record_program_ops_pre_hook(layer, inputs):
    """
    A pre-hook to mark op numbers before enter layer.forward.
    """
    if not _non_static_mode():
        if layer._op_recorder.start < 0:
            layer._op_recorder.start = len(
                default_main_program().current_block().ops)
            layer._op_recorder.is_valid = True
        else:
            layer._op_recorder.is_valid = False
            warnings.warn(
                "{} has recorded the op information before. Please check whether you call this layer twice."
                .format(layer._full_name))

    return None
Exemplo n.º 25
0
    def sample(self, shape, seed=0):
        """Generate samples of the specified shape.

        Args:
          shape (list): 1D `int32`. Shape of the generated samples.
          seed (int): Python integer number.

        Returns:
          Tensor: A tensor with prepended dimensions shape.The data type is float32.

        """
        if not _non_static_mode():
            check_type(shape, 'shape', (list), 'sample')
            check_type(seed, 'seed', (int), 'sample')

        name = self.name + '_sample'
        batch_shape = list((self.low + self.high).shape)
        if self.batch_size_unknown:
            output_shape = shape + batch_shape
            zero_tmp = tensor.fill_constant_batch_size_like(
                self.low + self.high, batch_shape + shape, self.dtype, 0.)
            uniform_random_tmp = nn.uniform_random_batch_size_like(
                zero_tmp,
                zero_tmp.shape,
                dtype=self.dtype,
                min=0.,
                max=1.,
                seed=seed)
            zero_tmp_reshape = nn.reshape(zero_tmp, output_shape)
            uniform_random_tmp_reshape = nn.reshape(uniform_random_tmp,
                                                    output_shape)
            output = uniform_random_tmp_reshape * (zero_tmp_reshape +
                                                   self.high - self.low)
            output = elementwise_add(output, self.low, name=name)
            return output
        else:
            output_shape = shape + batch_shape
            output = nn.uniform_random(
                output_shape, dtype=self.dtype, min=0., max=1.,
                seed=seed) * (tensor.zeros(output_shape, dtype=self.dtype) +
                              (self.high - self.low))
            output = elementwise_add(output, self.low, name=name)
            if self.all_arg_is_float:
                return nn.reshape(output, shape, name=name)
            else:
                return output
Exemplo n.º 26
0
def _dirichlet(concentration, name=None):
    op_type = 'dirichlet'

    check_variable_and_dtype(concentration, 'concentration',
                             ['float32', 'float64'], op_type)

    if _non_static_mode():
        return paddle._C_ops.dirichlet(concentration)

    else:
        helper = LayerHelper(op_type, **locals())
        out = helper.create_variable_for_type_inference(
            dtype=concentration.dtype)
        helper.append_op(type=op_type,
                         inputs={"Alpha": concentration},
                         outputs={'Out': out},
                         attrs={})
        return out
Exemplo n.º 27
0
    def kl_divergence(self, other):
        r"""The KL-divergence between two normal distributions.

        The probability density function (pdf) is

        .. math::

            KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\\frac{diff}{\sigma_1})^2 - 1 - 2 \\ln {ratio})

        .. math::

            ratio = \\frac{\sigma_0}{\sigma_1}
        
        .. math::

            diff = \mu_1 - \mu_0

        In the above equation:

        * :math:`loc = \mu_0`: is the mean of current Normal distribution.
        * :math:`scale = \sigma_0`: is the std of current Normal distribution.
        * :math:`loc = \mu_1`: is the mean of other Normal distribution.
        * :math:`scale = \sigma_1`: is the std of other Normal distribution.
        * :math:`ratio`: is the ratio of scales.
        * :math:`diff`: is the difference between means.

        Args:
            other (Normal): instance of Normal.

        Returns:
            Tensor: kl-divergence between two normal distributions.The data type is float32.

        """
        if not _non_static_mode():
            check_type(other, 'other', Normal, 'kl_divergence')

        name = self.name + '_kl_divergence'
        var_ratio = self.scale / other.scale
        var_ratio = (var_ratio * var_ratio)
        t1 = (self.loc - other.loc) / other.scale
        t1 = (t1 * t1)
        return elementwise_add(0.5 * var_ratio,
                               0.5 * (t1 - 1. - nn.log(var_ratio)),
                               name=name)
Exemplo n.º 28
0
    def forward(self,
                text,
                text_pair=None,
                do_lower_case=True,
                max_seq_len=-1,
                is_split_into_words=False,
                pad_to_max_seq_len=False):
        if _non_static_mode():
            input_ids, seg_ids = _C_ops.faster_tokenizer(
                self.vocab, text, text_pair, "do_lower_case", do_lower_case,
                "max_seq_len", max_seq_len, "pad_to_max_seq_len",
                pad_to_max_seq_len, "is_split_into_words", is_split_into_words)
            return input_ids, seg_ids

        attrs = {
            "do_lower_case": do_lower_case,
            "max_seq_len": max_seq_len,
            "pad_to_max_seq_len": pad_to_max_seq_len,
            "is_split_into_words": is_split_into_words,
        }
        helper = LayerHelper("faster_tokenizer")
        input_ids = helper.create_variable_for_type_inference(dtype="int64")
        seg_ids = helper.create_variable_for_type_inference(dtype="int64")
        if text_pair is None:
            helper.append_op(
                type='faster_tokenizer',
                inputs={'Vocab': self.vocab,
                        'Text': text},
                outputs={'InputIds': input_ids,
                         'SegmentIds': seg_ids},
                attrs=attrs)
        else:
            helper.append_op(
                type='faster_tokenizer',
                inputs={
                    'Vocab': self.vocab,
                    'Text': text,
                    'TextPair': text_pair
                },
                outputs={'InputIds': input_ids,
                         'SegmentIds': seg_ids},
                attrs=attrs)
        return input_ids, seg_ids
Exemplo n.º 29
0
    def forward(self, input, label, seq_length=None):
        if _non_static_mode():
            return _C_ops.chunk_eval(
                input, label, seq_length, "num_chunk_types",
                self.num_chunk_types, "chunk_scheme", self.chunk_scheme,
                "excluded_chunk_types", self.excluded_chunk_types or [])

        precision = self._helper.create_variable_for_type_inference(
            dtype="float32")
        recall = self._helper.create_variable_for_type_inference(
            dtype="float32")
        f1_score = self._helper.create_variable_for_type_inference(
            dtype="float32")
        num_infer_chunks = self._helper.create_variable_for_type_inference(
            dtype="int64")
        num_label_chunks = self._helper.create_variable_for_type_inference(
            dtype="int64")
        num_correct_chunks = self._helper.create_variable_for_type_inference(
            dtype="int64")

        this_input = {"Inference": [input], "Label": [label]}
        if seq_length is not None:
            this_input["SeqLength"] = [seq_length]

        self._helper.append_op(
            type='chunk_eval',
            inputs=this_input,
            outputs={
                "Precision": [precision],
                "Recall": [recall],
                "F1-Score": [f1_score],
                "NumInferChunks": [num_infer_chunks],
                "NumLabelChunks": [num_label_chunks],
                "NumCorrectChunks": [num_correct_chunks]
            },
            attrs={
                "num_chunk_types": self.num_chunk_types,
                "chunk_scheme": self.chunk_scheme,
                "excluded_chunk_types": self.excluded_chunk_types or []
            })
        return (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
                num_correct_chunks)
Exemplo n.º 30
0
    def forward(self, input, label=None, length=None):
        if _non_static_mode():
            return _C_ops.crf_decoding(input, self._transition, label, length,
                                       "is_test", self._is_test)

        viterbi_path = self._helper.create_variable_for_type_inference(
            dtype=self._dtype)
        this_inputs = {
            "Emission": [input],
            "Transition": self._transition,
            "Label": label
        }
        if length is not None:
            this_inputs['Length'] = [length]
        self._helper.append_op(
            type='crf_decoding',
            inputs=this_inputs,
            outputs={"ViterbiPath": [viterbi_path]},
            attrs={"is_test": self._is_test, })
        return viterbi_path