def cross_entropy_with_full_softmax( output, # Node providing the output of the lstm layers target_vector, # Node providing the expected labels sv_dim, vocab_dim ): sv_vector = output.outputs[3] z = output.outputs[0] zT = C.times_transpose(z, target_vector) # cross entropy loss with softmax function ce = - C.log(zT) # the error zMax = C.reduce_max(z) error = C.less(zT, zMax) ce = sequence.reduce_sum(ce) # discourages the network from turning more than one gate off in a single time step. sumc = C.abs(C.sequence.slice(sv_vector, 1, 0) - C.sequence.slice(sv_vector, 0, -1)) sumc = sequence.reduce_sum(0.0001 * C.pow(100.0, sumc)) #ce += sumc # penalise generated utterances that failed to render all the required slots sumc += C.abs(C.sequence.last(sv_vector)) sumc += C.abs(C.sequence.first(sv_vector) - output.outputs[4]) sumc = C.reduce_sum(sumc) ce = C.reduce_sum(ce) ce += sumc return ce, error
def binary_focal_loss(output, target, alpha=1., gamma=2., name=''): """ CNTK binary class implementation of focal loss from "Focal Loss for Dense Object Detection" by Tsung-Yi Lin et al. Focal loss add a factor (1 - p) ^ gamma to the standard cross entropy criterion. Setting gamma > 0 reduces the relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples. Focal loss enables the training of highly ccurate dense object detectors in the presence of vast numbers of easy background examples or dataset with extreme class imbalance (e.g. 1:1000). This implementation will work in semantic segmentation of images i.e. output can be a rank 2 tensor of shape (row, col). Output will be correct even in edge case where entire image is background. Maths: Focal Loss = - alpha * (1 - p) ^ gamma * log ( p ) Arguments: output: the computed posterior probability from the network (typ. a ``sigmoid``). Can be from shape (1,) for simple classification up to shape (row, col) for semantic segmentation of images. target: ground-truth label, 0 or 1 alpha (float): sacling factor. weight assigned to rare classes. should slightly decrease as gamma increase. (defaults 1) gamma (float): Larger gamma reduces relative loss for well-classified examples. Recommended range [0.5, 5] (Default 2.) axis (int or :class:`~cntk.axis.Axis`, optional): if given, focal loss will be computed along this axis name (str, optional): the name of the Function instance in the network Returns: :class:`~cntk.ops.functions.Function` """ logprobA = target * C.log(output) logprobB = (1 - target) * C.log(1 - output) factorA = C.pow(1 - output, gamma) factorB = C.pow(output, gamma) return C.negate(alpha * (factorA * logprobA + factorB * logprobB), name=name)
def focal_loss_with_softmax(output_vector, target_vector, alpha=1, gamma=2., axis=-1, name=''): """ CNTK multi-class implementation of focal loss from "Focal Loss for Dense Object Detection" by Tsung-Yi Lin et al. Focal loss add a factor (1 - p) ^ gamma to the standard cross entropy criterion. Setting gamma > 0 reduces the relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples. Focal loss enables the training of highly accurate dense object detectors in the presence of vast numbers of easy background examples or dataset with extreme class imbalance (e.g. 1:1000). This implementation will work in semantic segmentation of images i.e. output can be a rank 2 tensor of shape (num_classes, row, col) Maths: Focal Loss = - alpha * (1 - p) ^ gamma * log ( p ) Example: Cx.focal_loss_with_softmax([[0, 0, 0.8, 0.2]], [[0, 0, 1, 0]]).eval() array([[0.31306446]], dtype=float32) Arguments: output_vector: the unscaled computed output values from the network. Can be from shape (num_classes,) for classification up to shape (num_classes, row, col) for semantic segmentation of images. target_vector: usually it is one-hot vector where the hot bit corresponds to the label index. But it can be any probability distribution over the labels. alpha (float): sacling factor. weight assigned to rare classes. should slightly decrease as gamma increase. (defaults 1) gamma (float): Larger gamma reduces relative loss for well-classified examples. Recommended range [0.5, 5] (Default 2.) axis (int or :class:`~cntk.axis.Axis`, optional): if given, focal loss will be computed along this axis name (str, optional): the name of the Function instance in the network Returns: :class:`~cntk.ops.functions.Function` """ prob = C.softmax(output_vector, axis=axis) log_prob = target_vector * C.log(prob) # cross entropy with softmax factor = C.pow(1 - prob, gamma) return C.negate(alpha * C.reduce_sum(factor * log_prob, axis=axis), name=name)
def gaussian_mdn_phi(target, mu, sigma, ndim: int): """ Calculates phi between the target tensor and the network prediction Does not assumes independence between components of target. Arguments: target: target tensor with shape (ndim, ) mu: means of gaussian mdn with shape (nmix, ndim) sigma: sigma of gaussian mdn nmix (int): number of mixtures ndim (int): number of dimensions in gaussian Returns: :class:`~cntk.ops.functions.Function` """ if not len(mu.shape) == 2: raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape)) t = C.expand_dims(target, axis=0) exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma)))) factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim)) return factor * exp_term
def test_Pow(tmpdir): model = C.pow([1, 2, -2], [3, -2, 3]) verify_no_input(model, tmpdir, 'Pow_0')
def test_Pow(tmpdir, dtype): with C.default_options(dtype=dtype): model = C.pow( np.array([1, 2, -2]).astype(dtype), np.array([3, -2, 3]).astype(dtype)) verify_no_input(model, tmpdir, 'Pow_0')
def test_Pow(tmpdir, dtype): with C.default_options(dtype = dtype): model = C.pow(np.array([1, 2, -2]).astype(dtype), np.array([3, -2, 3]).astype(dtype)) verify_no_input(model, tmpdir, 'Pow_0')
import cntk print("Tensor A = [1,2,3]") print("Tensor B = [4,5,6]\n") print("A+B:") sum = cntk.plus([1, 2, 3], [4, 5, 6]).eval() print("{}\n".format(sum)) print("A-B:") minus = cntk.minus([1, 2, 3], [4, 5, 6]).eval() print("{}\n".format(minus)) print("A*B:") times = cntk.times([1, 3, 4], [4, 5, 6]).eval() print("{}\n".format(times)) print("A/B:") divide = cntk.element_divide([4, 32, 15], [2, 4, 5]).eval() print("{}\n".format(divide)) print("A^B:") pow = cntk.pow([1, 3, 4], [4, 2, 3]).eval() print("{}\n".format(pow)) print("Min(A,B):") min = cntk.element_min([1, 2, 3], [4, 5, 6], [2, 1, 0]).eval() print("{}\n".format(min)) print("Max(A,B):") max = cntk.element_max([1, 2, 3], [4, 5, 6], [2, 9, 0]).eval() print("{}\n".format(max))
def gelu(x): return 0.5 * x * ( 1 + C.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * C.pow(x, 3))))