def _kl_expfamily_expfamily(p, q): """Compute kl-divergence using `Bregman divergences <https://www.lix.polytechnique.fr/~nielsen/EntropyEF-ICIP2010.pdf>`_ """ if not type(p) == type(q): raise NotImplementedError p_natural_params = [] for param in p._natural_parameters: param = param.detach() param.stop_gradient = False p_natural_params.append(param) q_natural_params = q._natural_parameters p_log_norm = p._log_normalizer(*p_natural_params) try: if _non_static_mode(): p_grads = paddle.grad(p_log_norm, p_natural_params, create_graph=True) else: p_grads = paddle.static.gradients(p_log_norm, p_natural_params) except RuntimeError as e: raise TypeError( "Cann't compute kl_divergence({cls_p}, {cls_q}) use bregman divergence. Please register_kl({cls_p}, {cls_q})." .format(cls_p=type(p).__name__, cls_q=type(q).__name__)) from e kl = q._log_normalizer(*q_natural_params) - p_log_norm for p_param, q_param, p_grad in zip(p_natural_params, q_natural_params, p_grads): term = (q_param - p_param) * p_grad kl -= _sum_rightmost(term, len(q.event_shape)) return kl
def enabled(): """ This function checks whether the program runs in dynamic graph mode or not. You can enter dynamic graph mode with :ref:`api_fluid_dygraph_guard` api, or enable and disable dynamic graph mode with :ref:`api_fluid_dygraph_enable_dygraph` and :ref:`api_fluid_dygraph_disable_dygraph` api . **Note**: ``fluid.dygraph.enabled`` is the alias of ``fluid.in_dygraph_mode``, and ``fluid.in_dygraph_mode`` is recommended to use for now. Returns: bool: Whether the program is running in dynamic graph mode. Examples: .. code-block:: python import paddle.fluid as fluid fluid.enable_dygraph() # Now we are in dygragh mode print(fluid.dygraph.enabled()) # True fluid.disable_dygraph() print(fluid.dygraph.enabled()) # False """ # TODO(jiabin): Make this check as in_dygraph_mode when we support default eager mode. return framework._non_static_mode()
def log_prob(self, value): """Log probability density/mass function. Args: value (Tensor): The input tensor. Returns: Tensor: log probability.The data type is same with value. """ value = self._check_values_dtype_in_probs(self.low, value) if _non_static_mode(): # ensure value in [low, high] lb_bool = self.low < value ub_bool = value < self.high lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', value.dtype) ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype) return nn.log(lb * ub) - nn.log(self.high - self.low) name = self.name + '_log_prob' lb_bool = self.low < value ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_sub(nn.log(lb * ub), nn.log(self.high - self.low), name=name)
def _ndarray_to_tensor(obj, return_numpy): if return_numpy: return obj if _non_static_mode(): return paddle.to_tensor(obj) else: return _to_LodTensor(obj)
def _parse_load_result(obj, return_numpy): def is_layer(obj): return isinstance(obj, fluid.Layer) def parse_layer(obj): temp_dict = _parse_load_result(obj.__dict__, False) obj.__dict__.update(temp_dict) return obj if _contain_x(obj, is_layer): if not _non_static_mode(): raise ValueError( "Layer can only be loaded in dynamic graph mode, but now in static graph mode." ) _parse_every_object(obj, is_layer, parse_layer) def tuple_to_tensor(obj): return _tuple_to_tensor(obj, return_numpy=return_numpy) def ndarray_to_tensor(obj): return _ndarray_to_tensor(obj, return_numpy=return_numpy) # tuple(name, ndarry) was converted from varbase of paddle2.1, # and all tuple(name, ndarry) are converted to tensor. if _contain_x(obj, _transformed_from_varbase): return _parse_every_object(obj, _transformed_from_varbase, tuple_to_tensor) # If there is no tuple(name, ndary), it is considered to be saved by paddle2.0 # or converted from LoDTensor, and all ndarrays are converted to tensor. else: return _parse_every_object(obj, _transformed_from_lodtensor, ndarray_to_tensor)
def forward(self, input, label, length=None): if _non_static_mode(): _, _, _, log_likelihood = _C_ops.linear_chain_crf( input, self._transition, label, length, "is_test", self._is_test) return log_likelihood alpha = self._helper.create_variable_for_type_inference( dtype=self._dtype) emission_exps = self._helper.create_variable_for_type_inference( dtype=self._dtype) transition_exps = self._helper.create_variable_for_type_inference( dtype=self._dtype) log_likelihood = self._helper.create_variable_for_type_inference( dtype=self._dtype) this_inputs = { "Emission": [input], "Transition": self._transition, "Label": [label] } if length is not None: this_inputs['Length'] = [length] self._helper.append_op( type='linear_chain_crf', inputs=this_inputs, outputs={ "Alpha": [alpha], "EmissionExps": [emission_exps], "TransitionExps": transition_exps, "LogLikelihood": log_likelihood }, attrs={"is_test": self._is_test, }) return log_likelihood
def __init__(self, logits, name=None): """ Args: logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. """ if not _non_static_mode(): check_type(logits, 'logits', (np.ndarray, tensor.Variable, list, tuple), 'Categorical') self.name = name if name is not None else 'Categorical' self.dtype = 'float32' if self._validate_args(logits): self.logits = logits self.dtype = convert_dtype(logits.dtype) else: if isinstance(logits, np.ndarray) and str( logits.dtype) in ['float32', 'float64']: self.dtype = logits.dtype self.logits = self._to_tensor(logits)[0] if self.dtype != convert_dtype(self.logits.dtype): self.logits = tensor.cast(self.logits, dtype=self.dtype) dist_sum = paddle.sum(self.logits, axis=-1, keepdim=True) self._prob = self.logits / dist_sum
def prepare_context(strategy=None): ''' :api_attr: imperative ''' if strategy is None: strategy = ParallelStrategy() strategy.nranks = Env().nranks strategy.local_rank = Env().local_rank strategy.trainer_endpoints = Env().trainer_endpoints strategy.current_endpoint = Env().current_endpoint if strategy.nranks < 2: return assert framework._non_static_mode() is True, \ "dygraph.prepare_context should be used with dygraph mode." place = framework._current_expected_place() assert place is not None, \ "dygraph.prepare_context should be used in fluid.dygraph.guard(place) guard." if not parallel_helper._is_parallel_ctx_initialized(): if isinstance(place, core.CUDAPlace): parallel_helper._set_parallel_ctx( core.NCCLParallelContext(strategy, place)) elif isinstance(place, core.XPUPlace): parallel_helper._set_parallel_ctx( core.BKCLParallelContext(strategy, place)) elif isinstance(place, core.NPUPlace): parallel_helper._set_parallel_ctx( core.HCCLParallelContext(strategy, place)) else: # TODO(Yancey1989): add Gloo Parallel Context to support CPU parallel computation assert ("Only support CUDAPlace or XPUPlace or NPUPlace for now.") parallel_helper._init_parallel_ctx() return strategy
def entropy(self): """caculate entropy use `bregman divergence` https://www.lix.polytechnique.fr/~nielsen/EntropyEF-ICIP2010.pdf """ entropy_value = -self._mean_carrier_measure natural_parameters = [] for parameter in self._natural_parameters: parameter = parameter.detach() parameter.stop_gradient = False natural_parameters.append(parameter) log_norm = self._log_normalizer(*natural_parameters) if _non_static_mode(): grads = paddle.grad(log_norm.sum(), natural_parameters, create_graph=True) else: grads = paddle.static.gradients(log_norm.sum(), natural_parameters) entropy_value += log_norm for p, g in zip(natural_parameters, grads): entropy_value -= p * g return entropy_value
def _check_values_dtype_in_probs(self, param, value): """ Log_prob and probs methods have input ``value``, if value's dtype is different from param, convert value's dtype to be consistent with param's dtype. Args: param (Tensor): low and high in Uniform class, loc and scale in Normal class. value (Tensor): The input tensor. Returns: value (Tensor): Change value's dtype if value's dtype is different from param. """ if _non_static_mode(): if value.dtype != param.dtype and convert_dtype( value.dtype) in ['float32', 'float64']: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype', param.dtype) return value check_variable_and_dtype(value, 'value', ['float32', 'float64'], 'log_prob') if value.dtype != param.dtype: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return tensor.cast(value, dtype=param.dtype) return value
def probs(self, value): """Probability density/mass function. Args: value (Tensor): The input tensor. Returns: Tensor: probability.The data type is same with value. """ value = self._check_values_dtype_in_probs(self.low, value) if _non_static_mode(): lb_bool = self.low < value ub_bool = value < self.high lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', value.dtype) ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype) return (lb * ub) / (self.high - self.low) name = self.name + '_probs' lb_bool = self.low < value ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_div((lb * ub), (self.high - self.low), name=name)
def kl_divergence(self, other): """The KL-divergence between two Categorical distributions. Args: other (Categorical): instance of Categorical. The data type is float32. Returns: Tensor: kl-divergence between two Categorical distributions. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] paddle.seed(200) # on CPU device y = paddle.rand([6]) print(y) # [0.77663314 0.90824795 0.15685187 # 0.04279523 0.34468332 0.7955718 ] cat = Categorical(x) cat2 = Categorical(y) cat.kl_divergence(cat2) # [0.071952] """ name = self.name + '_kl_divergence' if not _non_static_mode(): check_type(other, 'other', Categorical, 'kl_divergence') logits = self.logits - \ paddle.max(self.logits, axis=-1, keepdim=True) other_logits = other.logits - paddle.max( other.logits, axis=-1, keepdim=True) e_logits = ops.exp(logits) other_e_logits = ops.exp(other_logits) z = paddle.sum(e_logits, axis=-1, keepdim=True) other_z = paddle.sum(other_e_logits, axis=-1, keepdim=True) prob = e_logits / z kl = paddle.sum( prob * (logits - paddle.log(z) - other_logits + paddle.log(other_z)), axis=-1, keepdim=True, name=name) return kl
def sample(self, shape): """Generate samples of the specified shape. Args: shape (list): Shape of the generated samples. Returns: Tensor: A tensor with prepended dimensions shape. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] cat = Categorical(x) paddle.seed(1000) # on CPU device cat.sample([2,3]) # [[0, 0, 5], # [3, 4, 5]] """ name = self.name + '_sample' if not _non_static_mode(): check_type(shape, 'shape', (list), 'sample') num_samples = np.prod(np.array(shape)) logits_shape = list(self.logits.shape) if len(logits_shape) > 1: sample_shape = shape + logits_shape[:-1] logits = paddle.reshape( self.logits, [np.prod(logits_shape[:-1]), logits_shape[-1]]) else: sample_shape = shape logits = self.logits sample_index = multinomial(self._logits_to_probs(logits), num_samples, True) # multinomial sample shape is (logits.shape[:-1], num_samples), need to # tanspose to (num_samples, logits.shape[:-1]) permute = list(range(sample_index.dim())) permute.insert(0, permute.pop(-1)) sample_index = sample_index.transpose(permute) return paddle.reshape(sample_index, sample_shape, name=name)
def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) sum_1 = self._get_accumulator('sum_1', param_and_grad[0]) sum_2 = self._get_accumulator('sum_2', param_and_grad[0]) sum_3 = self._get_accumulator('sum_3', param_and_grad[0]) num_accumulates = self._get_accumulator('num_accumulates', param_and_grad[0]) old_num_accumulates = self._get_accumulator('old_num_accumulates', param_and_grad[0]) num_updates = self._get_accumulator('num_updates', param_and_grad[0]) if framework._non_static_mode(): _, _, _, _, _, _ = _C_ops.average_accumulates( param_and_grad[0], sum_1, sum_2, sum_3, num_accumulates, old_num_accumulates, num_updates, sum_1, sum_2, sum_3, num_accumulates, old_num_accumulates, num_updates, 'average_window', self.average_window, 'min_average_window', self.min_average_window, 'max_average_window', self.max_average_window) return None block = framework.default_main_program().global_block() attrs = { "average_window": self.average_window, "min_average_window": self.min_average_window, "max_average_window": self.max_average_window, } inputs = { "param": param_and_grad[0], "in_sum_1": sum_1, "in_sum_2": sum_2, "in_sum_3": sum_3, "in_num_accumulates": num_accumulates, "in_old_num_accumulates": old_num_accumulates, "in_num_updates": num_updates } outputs = { "out_sum_1": sum_1, "out_sum_2": sum_2, "out_sum_3": sum_3, "out_num_accumulates": num_accumulates, "out_old_num_accumulates": old_num_accumulates, "out_num_updates": num_updates, } average_accumulates_op = block.append_op(type=self.type, inputs=inputs, outputs=outputs, attrs=attrs, stop_gradient=True) return average_accumulates_op
def _tuple_to_tensor(obj, return_numpy): if return_numpy: return obj[1] if _non_static_mode(): t = paddle.to_tensor(obj[1]) # This function does modify the name of return value. # Loading the same variable multiple times may cause the same name. t.name = obj[0] return t else: return _to_LodTensor(obj[1])
def minimize(self, loss, startup_program=None, parameters=None, no_grad_set=None): """ Add operations to minimize ``loss`` by updating ``parameters``. Args: loss (Tensor): A ``Tensor`` containing the value to minimize. startup_program (Program, optional): :ref:`api_fluid_Program` for initializing parameters in ``parameters``. The default value is None, at this time :ref:`api_fluid_default_startup_program` will be used. parameters (list, optional): List of ``Tensor`` or ``Tensor.name`` to update to minimize ``loss``. The default value is None, at this time all parameters will be updated. no_grad_set (set, optional): Set of ``Tensor`` or ``Tensor.name`` that don't need to be updated. The default value is None. Returns: tuple: tuple (optimize_ops, params_grads), A list of operators appended by minimize and a list of (param, grad) tensor pairs, param is ``Parameter``, grad is the gradient value corresponding to the parameter. In static graph mode, the returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to indicate program pruning. If so, the program will be pruned by ``feed`` and ``fetch_list`` before run, see details in ``Executor``. Examples: .. code-block:: python import paddle import numpy as np inp = paddle.to_tensor(np.random.random([1, 10]).astype('float32')) linear = paddle.nn.Linear(10, 1) out = linear(inp) loss = paddle.mean(out) loss.backward() sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters()) sgd.minimize(loss) modelaverage = paddle.incubate.ModelAverage(0.15, parameters=linear.parameters(), min_average_window=2, max_average_window=4) modelaverage.minimize(loss) sgd.clear_grad() modelaverage.clear_grad() """ if framework._non_static_mode(): self.step()
def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) velocity_acc = self._get_accumulator(self._velocity_acc_str, param_and_grad[0]) lr = self._create_param_lr(param_and_grad) find_master = self._multi_precision and param_and_grad[ 0].dtype == core.VarDesc.VarType.FP16 master_weight = (self._master_weights[param_and_grad[0].name] if find_master else None) if framework._non_static_mode(): _, _, _ = _C_ops.momentum( param_and_grad[0], param_and_grad[1], velocity_acc, lr, master_weight, param_and_grad[0], velocity_acc, master_weight, 'mu', self._momentum, 'use_nesterov', self._use_nesterov, 'regularization_method', self._regularization_method, 'regularization_coeff', self._regularization_coeff, 'multi_precision', find_master) return None attrs = { "mu": self._momentum, "use_nesterov": self._use_nesterov, "regularization_method": self._regularization_method, "regularization_coeff": self._regularization_coeff, "multi_precision": find_master, "rescale_grad": self._rescale_grad } inputs = { "Param": [param_and_grad[0]], "Grad": [param_and_grad[1]], "Velocity": [velocity_acc], "LearningRate": [lr] } outputs = { "ParamOut": [param_and_grad[0]], "VelocityOut": [velocity_acc] } if find_master: inputs["MasterParam"] = master_weight outputs["MasterParamOut"] = master_weight # create the momentum optimize op momentum_op = block.append_op(type=self.type, inputs=inputs, outputs=outputs, attrs=attrs, stop_gradient=True) return momentum_op
def valid(self): if _non_static_mode(): return False return self._run_env is not None and \ self._platform is not None and \ self._job_id is not None and \ self._hdfs_home is not None and \ self._hdfs_name is not None and \ self._hdfs_ugi is not None and \ self._hdfs_checkpoint_path is not None and \ self._trainer_id is not None
def fused_matmul_bias(x, y, bias=None, transpose_x=False, transpose_y=False, name=None): """ Applies matrix multiplication of two tensors and then bias addition if provided. This method requires CUDA version >= 11.6. Args: x (Tensor): the first input Tensor to be multiplied. y (Tensor): the second input Tensor to be multiplied. Its rank must be 2. bias (Tensor|None): the input bias Tensor. If it is None, no bias addition would be performed. Otherwise, the bias is added to the matrix multiplication result. transpose_x (bool): Whether to transpose :math:`x` before multiplication. transpose_y (bool): Whether to transpose :math:`y` before multiplication. name(str|None): For detailed information, please refer to :ref:`api_guide_Name` . Usually name is no need to set and None by default. Returns: Tensor: the output Tensor. Examples: .. code-block:: python # required: gpu import paddle from paddle.incubate.nn.functional import fused_matmul_bias x = paddle.randn([3, 4]) y = paddle.randn([4, 5]) bias = paddle.randn([5]) out = fused_matmul_bias(x, y, bias) print(out.shape) # [3, 5] """ if bias is None: return matmul(x, y, transpose_x, transpose_y, name) if _non_static_mode(): return _C_ops.fused_gemm_epilogue(x, y, bias, 'trans_x', transpose_x, 'trans_y', transpose_y) helper = LayerHelper('fused_matmul_bias', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( type='fused_gemm_epilogue', inputs={'X': x, 'Y': y, 'Bias': bias}, outputs={'Out': out}, attrs={'trans_x': transpose_x, 'trans_y': transpose_y}) return out
def softmax_mask_fuse(x, mask, name=None): """ Do a masked softmax on x. This is designed for speeding up Transformer structure. Used for reducing operation such as: tmp = x + mask, out = softmax(tmp). The equation is: .. math:: out = softmax(x + mask) **Note**: This API only supports GPU. Args: x (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32. The fourth dimension of x must be larger or equal to 32 and less then 8192. mask (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32. The second dimension of mask must be 1, and other dimensions must be same with x. name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Returns: 4-D Tensor. A location into which the result is stored. It’s dimension is 4D. Has same shape with x. Examples: .. code-block:: python # required: gpu import paddle import paddle.incubate as incubate x = paddle.rand([2, 8, 8, 32]) mask = paddle.rand([2, 1, 8, 32]) rst = incubate.softmax_mask_fuse(x, mask) # [[[[0.02404429, 0.04658398, 0.02746007, ..., 0.01489375, 0.02397441, 0.02851614] ... ]]] """ if _non_static_mode(): out = _C_ops.fused_softmax_mask(x, mask) return out helper = LayerHelper('fused_softmax_mask', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type='fused_softmax_mask', inputs={ 'X': [x], 'Mask': [mask] }, outputs={'Out': [out]}) return out
def softmax_mask_fuse_upper_triangle(x): """ Do a masked softmax on x, which will always mask upper triangle part of x. This is designed for speeding up GPT kind Transformer structure. Used for reducing operation such as: tmp = x + mask, out = softmax(tmp), where the mask is always be an upper triangle matrix. The equation is: .. math:: out = softmax(LowerTriangular(x)) **Note**: This API only supports GPU. Args: x (4-D Tensor): The input tensor, should be in 4D shape, it's data type should be float16, float32 The fourth dimension of x must be larger or equal to 32 and less then 8192. The third dimension of x must be same with the fourth dimension of x. Returns: 4-D Tensor. A location into which the result is stored. It’s dimension is 4D. Has same dimension with x. Examples: .. code-block:: python # required: gpu import paddle import paddle.incubate as incubate x = paddle.rand((1, 1, 32, 32)) rst = incubate.softmax_mask_fuse_upper_triangle(x) # [[[[1. , 0. , 0. , ..., 0., 0., 0.], # [0.45324376, 0.54675621, 0. , ..., 0., 0., 0.], # [0.32674268, 0.28156221, 0.39169508, ..., 0., 0., 0.] # ... ]]] """ if _non_static_mode(): out = _C_ops.fused_softmax_mask_upper_triangle(x) return out helper = LayerHelper('fused_softmax_mask_upper_triangle', **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op(type='fused_softmax_mask_upper_triangle', inputs={'X': [x]}, outputs={'Out': [out]}) return out
def restore(self, executor=None): """ Restore ``Parameter`` values of current model. Args: executor(Executor): The network executor in static-graph mode. The default value is None in dygraph mode Examples: .. code-block:: python import paddle import numpy as np inp = paddle.to_tensor(np.random.random([1, 10]).astype('float32')) linear = paddle.nn.Linear(10, 1) out = linear(inp) loss = paddle.mean(out) loss.backward() sgd = paddle.optimizer.SGD(learning_rate=0.1,parameters=linear.parameters()) modelaverage = paddle.incubate.ModelAverage(0.15, parameters=linear.parameters(), min_average_window=2, max_average_window=4) sgd.step() modelaverage.step() with modelaverage.apply(need_restore=False): for param in linear.parameters(): print(param) for param in linear.parameters(): print(param) modelaverage.restore() for param in linear.parameters(): print(param) """ if framework._non_static_mode(): for param in self._parameter_list: param_restore = self._get_accumulator('restore', param) paddle.assign(param_restore, param) return if executor is None: raise RuntimeError( "Executor should not be None in static graph mode.") executor.run(self.restore_program)
def param_guard(parameters): # Note: parameters is a reference of self._parameters or self._buffers if in_declarative_mode( ) and not framework._non_static_mode() and parameters: origin_parameters = parameters.copy() for name, var_base in parameters.items(): if isinstance(var_base, list): new_var = [_convert_into_variable(var) for var in var_base] else: new_var = _convert_into_variable(var_base) parameters[name] = new_var yield parameters.update(origin_parameters) else: yield
def record_program_ops_pre_hook(layer, inputs): """ A pre-hook to mark op numbers before enter layer.forward. """ if not _non_static_mode(): if layer._op_recorder.start < 0: layer._op_recorder.start = len( default_main_program().current_block().ops) layer._op_recorder.is_valid = True else: layer._op_recorder.is_valid = False warnings.warn( "{} has recorded the op information before. Please check whether you call this layer twice." .format(layer._full_name)) return None
def sample(self, shape, seed=0): """Generate samples of the specified shape. Args: shape (list): 1D `int32`. Shape of the generated samples. seed (int): Python integer number. Returns: Tensor: A tensor with prepended dimensions shape.The data type is float32. """ if not _non_static_mode(): check_type(shape, 'shape', (list), 'sample') check_type(seed, 'seed', (int), 'sample') name = self.name + '_sample' batch_shape = list((self.low + self.high).shape) if self.batch_size_unknown: output_shape = shape + batch_shape zero_tmp = tensor.fill_constant_batch_size_like( self.low + self.high, batch_shape + shape, self.dtype, 0.) uniform_random_tmp = nn.uniform_random_batch_size_like( zero_tmp, zero_tmp.shape, dtype=self.dtype, min=0., max=1., seed=seed) zero_tmp_reshape = nn.reshape(zero_tmp, output_shape) uniform_random_tmp_reshape = nn.reshape(uniform_random_tmp, output_shape) output = uniform_random_tmp_reshape * (zero_tmp_reshape + self.high - self.low) output = elementwise_add(output, self.low, name=name) return output else: output_shape = shape + batch_shape output = nn.uniform_random( output_shape, dtype=self.dtype, min=0., max=1., seed=seed) * (tensor.zeros(output_shape, dtype=self.dtype) + (self.high - self.low)) output = elementwise_add(output, self.low, name=name) if self.all_arg_is_float: return nn.reshape(output, shape, name=name) else: return output
def _dirichlet(concentration, name=None): op_type = 'dirichlet' check_variable_and_dtype(concentration, 'concentration', ['float32', 'float64'], op_type) if _non_static_mode(): return paddle._C_ops.dirichlet(concentration) else: helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference( dtype=concentration.dtype) helper.append_op(type=op_type, inputs={"Alpha": concentration}, outputs={'Out': out}, attrs={}) return out
def kl_divergence(self, other): r"""The KL-divergence between two normal distributions. The probability density function (pdf) is .. math:: KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\\frac{diff}{\sigma_1})^2 - 1 - 2 \\ln {ratio}) .. math:: ratio = \\frac{\sigma_0}{\sigma_1} .. math:: diff = \mu_1 - \mu_0 In the above equation: * :math:`loc = \mu_0`: is the mean of current Normal distribution. * :math:`scale = \sigma_0`: is the std of current Normal distribution. * :math:`loc = \mu_1`: is the mean of other Normal distribution. * :math:`scale = \sigma_1`: is the std of other Normal distribution. * :math:`ratio`: is the ratio of scales. * :math:`diff`: is the difference between means. Args: other (Normal): instance of Normal. Returns: Tensor: kl-divergence between two normal distributions.The data type is float32. """ if not _non_static_mode(): check_type(other, 'other', Normal, 'kl_divergence') name = self.name + '_kl_divergence' var_ratio = self.scale / other.scale var_ratio = (var_ratio * var_ratio) t1 = (self.loc - other.loc) / other.scale t1 = (t1 * t1) return elementwise_add(0.5 * var_ratio, 0.5 * (t1 - 1. - nn.log(var_ratio)), name=name)
def forward(self, text, text_pair=None, do_lower_case=True, max_seq_len=-1, is_split_into_words=False, pad_to_max_seq_len=False): if _non_static_mode(): input_ids, seg_ids = _C_ops.faster_tokenizer( self.vocab, text, text_pair, "do_lower_case", do_lower_case, "max_seq_len", max_seq_len, "pad_to_max_seq_len", pad_to_max_seq_len, "is_split_into_words", is_split_into_words) return input_ids, seg_ids attrs = { "do_lower_case": do_lower_case, "max_seq_len": max_seq_len, "pad_to_max_seq_len": pad_to_max_seq_len, "is_split_into_words": is_split_into_words, } helper = LayerHelper("faster_tokenizer") input_ids = helper.create_variable_for_type_inference(dtype="int64") seg_ids = helper.create_variable_for_type_inference(dtype="int64") if text_pair is None: helper.append_op( type='faster_tokenizer', inputs={'Vocab': self.vocab, 'Text': text}, outputs={'InputIds': input_ids, 'SegmentIds': seg_ids}, attrs=attrs) else: helper.append_op( type='faster_tokenizer', inputs={ 'Vocab': self.vocab, 'Text': text, 'TextPair': text_pair }, outputs={'InputIds': input_ids, 'SegmentIds': seg_ids}, attrs=attrs) return input_ids, seg_ids
def forward(self, input, label, seq_length=None): if _non_static_mode(): return _C_ops.chunk_eval( input, label, seq_length, "num_chunk_types", self.num_chunk_types, "chunk_scheme", self.chunk_scheme, "excluded_chunk_types", self.excluded_chunk_types or []) precision = self._helper.create_variable_for_type_inference( dtype="float32") recall = self._helper.create_variable_for_type_inference( dtype="float32") f1_score = self._helper.create_variable_for_type_inference( dtype="float32") num_infer_chunks = self._helper.create_variable_for_type_inference( dtype="int64") num_label_chunks = self._helper.create_variable_for_type_inference( dtype="int64") num_correct_chunks = self._helper.create_variable_for_type_inference( dtype="int64") this_input = {"Inference": [input], "Label": [label]} if seq_length is not None: this_input["SeqLength"] = [seq_length] self._helper.append_op( type='chunk_eval', inputs=this_input, outputs={ "Precision": [precision], "Recall": [recall], "F1-Score": [f1_score], "NumInferChunks": [num_infer_chunks], "NumLabelChunks": [num_label_chunks], "NumCorrectChunks": [num_correct_chunks] }, attrs={ "num_chunk_types": self.num_chunk_types, "chunk_scheme": self.chunk_scheme, "excluded_chunk_types": self.excluded_chunk_types or [] }) return (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks)
def forward(self, input, label=None, length=None): if _non_static_mode(): return _C_ops.crf_decoding(input, self._transition, label, length, "is_test", self._is_test) viterbi_path = self._helper.create_variable_for_type_inference( dtype=self._dtype) this_inputs = { "Emission": [input], "Transition": self._transition, "Label": label } if length is not None: this_inputs['Length'] = [length] self._helper.append_op( type='crf_decoding', inputs=this_inputs, outputs={"ViterbiPath": [viterbi_path]}, attrs={"is_test": self._is_test, }) return viterbi_path