示例#1
0
def cross_entropy_with_full_softmax(
    output,  # Node providing the output of the lstm layers
    target_vector,  # Node providing the expected labels
    sv_dim, 
    vocab_dim
    ):
    sv_vector = output.outputs[3]
    z = output.outputs[0]
    zT = C.times_transpose(z, target_vector)
    # cross entropy loss with softmax function
    ce = - C.log(zT)
    # the error 
    zMax = C.reduce_max(z)
    error = C.less(zT, zMax)
    ce = sequence.reduce_sum(ce)
    # discourages the network from turning more than one gate off in a single time step.
    sumc = C.abs(C.sequence.slice(sv_vector, 1, 0) - C.sequence.slice(sv_vector, 0, -1))
    sumc = sequence.reduce_sum(0.0001 * C.pow(100.0, sumc))
    #ce += sumc
    # penalise generated utterances that failed to render all the required slots
    sumc += C.abs(C.sequence.last(sv_vector))
    sumc += C.abs(C.sequence.first(sv_vector) - output.outputs[4])
    sumc = C.reduce_sum(sumc)
    ce = C.reduce_sum(ce)
    ce += sumc
    return ce, error
示例#2
0
def binary_focal_loss(output, target, alpha=1., gamma=2., name=''):
    """
    CNTK binary class implementation of focal loss from "Focal Loss for Dense Object Detection" by Tsung-Yi Lin et al.

    Focal loss add a factor (1 - p) ^ gamma to the standard cross entropy criterion. Setting gamma > 0 reduces the
    relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples.
    Focal loss enables the training of highly ccurate dense object detectors in the presence of vast
    numbers of easy background examples or dataset with extreme class imbalance (e.g. 1:1000).

    This implementation will work in semantic segmentation of images i.e. output can
    be a rank 2 tensor of shape (row, col). Output will be correct even in edge case where entire image is background.

    Maths:
        Focal Loss = - alpha * (1 - p) ^ gamma * log ( p )

    Arguments:
        output: the computed posterior probability from the network (typ. a ``sigmoid``). Can be
          from shape (1,) for simple classification up to shape (row, col) for semantic segmentation of images.
        target: ground-truth label, 0 or 1
        alpha (float): sacling factor. weight assigned to rare classes.
          should slightly decrease as gamma increase. (defaults 1)
        gamma (float): Larger gamma reduces relative loss for well-classified examples.
          Recommended range [0.5, 5] (Default 2.)
        axis (int or :class:`~cntk.axis.Axis`, optional): if given, focal loss will be computed
                along this axis
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    """
    logprobA = target * C.log(output)
    logprobB = (1 - target) * C.log(1 - output)

    factorA = C.pow(1 - output, gamma)
    factorB = C.pow(output, gamma)

    return C.negate(alpha * (factorA * logprobA + factorB * logprobB), name=name)
示例#3
0
def focal_loss_with_softmax(output_vector, target_vector, alpha=1, gamma=2., axis=-1, name=''):
    """
    CNTK multi-class implementation of focal loss from "Focal Loss for Dense Object Detection" by Tsung-Yi Lin et al.

    Focal loss add a factor (1 - p) ^ gamma to the standard cross entropy criterion. Setting gamma > 0 reduces the
    relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples.
    Focal loss enables the training of highly accurate dense object detectors in the presence of vast
    numbers of easy background examples or dataset with extreme class imbalance (e.g. 1:1000).

    This implementation will work in semantic segmentation of images i.e. output can
    be a rank 2 tensor of shape (num_classes, row, col)

    Maths:
        Focal Loss = - alpha * (1 - p) ^ gamma * log ( p )

    Example:
        Cx.focal_loss_with_softmax([[0, 0, 0.8, 0.2]], [[0, 0, 1, 0]]).eval()
        array([[0.31306446]], dtype=float32)

    Arguments:
        output_vector: the unscaled computed output values from the network. Can be
          from shape (num_classes,) for classification up to shape (num_classes, row, col) for semantic segmentation
          of images.
        target_vector: usually it is one-hot vector where the hot bit
         corresponds to the label index. But it can be any probability
         distribution over the labels.
        alpha (float): sacling factor. weight assigned to rare classes.
          should slightly decrease as gamma increase. (defaults 1)
        gamma (float): Larger gamma reduces relative loss for well-classified examples.
          Recommended range [0.5, 5] (Default 2.)
        axis (int or :class:`~cntk.axis.Axis`, optional): if given, focal loss will be computed
                along this axis
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    """
    prob = C.softmax(output_vector, axis=axis)
    log_prob = target_vector * C.log(prob)  # cross entropy with softmax

    factor = C.pow(1 - prob, gamma)

    return C.negate(alpha * C.reduce_sum(factor * log_prob, axis=axis), name=name)
示例#4
0
    def gaussian_mdn_phi(target, mu, sigma, ndim: int):
        """
        Calculates phi between the target tensor and the network prediction
        Does not assumes independence between components of target.

        Arguments:
            target: target tensor with shape (ndim, )
            mu: means of gaussian mdn with shape (nmix, ndim)
            sigma: sigma of gaussian mdn
            nmix (int): number of mixtures
            ndim (int): number of dimensions in gaussian

        Returns:
            :class:`~cntk.ops.functions.Function`
        """
        if not len(mu.shape) == 2:
            raise ValueError("mu {0} must have shape (nmix, ndim)".format(mu.shape))

        t = C.expand_dims(target, axis=0)

        exp_term = C.exp(C.negate(C.square(C.reduce_l2(t - mu, axis=-1)) / (2 * C.square(sigma))))
        factor = C.reciprocal((2 * pi) ** (ndim / 2) * C.pow(sigma, ndim))
        return factor * exp_term
示例#5
0
def test_Pow(tmpdir):
    model = C.pow([1, 2, -2], [3, -2, 3])
    verify_no_input(model, tmpdir, 'Pow_0')
示例#6
0
def test_Pow(tmpdir, dtype):
    with C.default_options(dtype=dtype):
        model = C.pow(
            np.array([1, 2, -2]).astype(dtype),
            np.array([3, -2, 3]).astype(dtype))
        verify_no_input(model, tmpdir, 'Pow_0')
示例#7
0
def test_Pow(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        model = C.pow(np.array([1, 2, -2]).astype(dtype), np.array([3, -2, 3]).astype(dtype))
        verify_no_input(model, tmpdir, 'Pow_0')
示例#8
0
def test_Pow(tmpdir):
    model = C.pow([1, 2, -2], [3, -2, 3])
    verify_no_input(model, tmpdir, 'Pow_0')
示例#9
0
文件: app.py 项目: lcarli/CNTKSamples
import cntk
print("Tensor A = [1,2,3]")
print("Tensor B = [4,5,6]\n")

print("A+B:")
sum = cntk.plus([1, 2, 3], [4, 5, 6]).eval()
print("{}\n".format(sum))

print("A-B:")
minus = cntk.minus([1, 2, 3], [4, 5, 6]).eval()
print("{}\n".format(minus))

print("A*B:")
times = cntk.times([1, 3, 4], [4, 5, 6]).eval()
print("{}\n".format(times))

print("A/B:")
divide = cntk.element_divide([4, 32, 15], [2, 4, 5]).eval()
print("{}\n".format(divide))

print("A^B:")
pow = cntk.pow([1, 3, 4], [4, 2, 3]).eval()
print("{}\n".format(pow))

print("Min(A,B):")
min = cntk.element_min([1, 2, 3], [4, 5, 6], [2, 1, 0]).eval()
print("{}\n".format(min))

print("Max(A,B):")
max = cntk.element_max([1, 2, 3], [4, 5, 6], [2, 9, 0]).eval()
print("{}\n".format(max))
示例#10
0
def gelu(x):
    return 0.5 * x * (
        1 + C.tanh(np.sqrt(2 / np.pi) * (x + 0.044715 * C.pow(x, 3))))