Exemplo n.º 1
0
def squared_error(output, target, name=''):
    '''
    This operation computes the sum of the squared difference between elements
    in the two input matrices. The result is a scalar (i.e., one by one matrix).
    This is often used as a training criterion.

    Example:
        >>> i1 = C.input_variable((1,2))
        >>> i2 = C.input_variable((1,2))
        >>> C.squared_error(i1,i2).eval({i1:np.asarray([[[2., 1.]]], dtype=np.float32), i2:np.asarray([[[4., 6.]]], dtype=np.float32)})
        array([ 29.], dtype=float32)

        >>> C.squared_error(i1,i2).eval({i1:np.asarray([[[1., 2.]]], dtype=np.float32), i2:np.asarray([[[1., 2.]]], dtype=np.float32)})
        array([ 0.], dtype=float32)

    Args:
        output: the output values from the network
        target: it is usually a one-hot vector where the hot bit
         corresponds to the label index
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import squared_error
    dtype = get_data_type(output, target)
    output = sanitize_input(output, dtype)
    target = sanitize_input(target, dtype)
    return squared_error(output, target, name)
Exemplo n.º 2
0
def cosine_distance(x, y, name=''):
    '''
    Computes the cosine distance between ``x`` and ``y``:

    Example:
        >>> a = np.asarray([-1, -1, -1, 1, 1, -1, 1, 1, -1, 1, 1, -1]).reshape(3,2,2)
        >>> b = np.asarray([1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, 1]).reshape(3,2,2)
        >>> x = C.sequence.input_variable(shape=(2,))
        >>> y = C.sequence.input_variable(shape=(2,))
        >>> np.round(C.cosine_distance(x,y).eval({x:a,y:b}),5)
        array([[-1.,  1.],
               [ 1.,  0.],
               [ 0., -1.]], dtype=float32)

    Args:
        x: numpy array or any :class:`~cntk.ops.functions.Function` that outputs a tensor
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import cosine_distance
    dtype = get_data_type(x, y)
    x = sanitize_input(x, dtype)
    y = sanitize_input(y, dtype)
    return cosine_distance(x, y, name)
Exemplo n.º 3
0
    def unfold_from(initial_state, dynamic_axes_like):
        # create a new dynamic axis if a length increase is specified
        out_axis = dynamic_axes_like
        if length_increase != 1:
            factors = sequence.broadcast_as(length_increase, out_axis) # repeat each frame 'length_increase' times, on average
            out_axis = sequence.where(factors)  # note: values are irrelevant; only the newly created axis matters

        state_fwd = ForwardDeclaration(name='unfold_state_fwd')
        prev_state = sequence.delay(state_fwd, initial_state=initial_state, name='unfold_prev_state')
        # TODO: must allow multiple variables, just like recurrence, as to allow beam decoding (permutation matrix)
        z = generator_function(prev_state) # returns either (output) or (output, new state)
        output = z.outputs[0]
        new_state = z.outputs[1] if len(z.outputs) > 1 else output # we allow generator to return a single value if it is identical to the new state
        # implant the dynamic axis (from dynamic_axes_like)
        from cntk.internal import sanitize_input, typemap
        new_state = typemap(reconcile_dynamic_axes)(sanitize_input(new_state), sanitize_input(out_axis))
        new_state = combine([new_state], name='unfold_new_state')
        state_fwd.resolve_to(new_state)

        output = combine([output], name='unfold_output') # BUGBUG: without this, it crashes with bad weak ptr
        # BUGBUG: MUST do this after resolving the recurrence, otherwise also crashes

        # apply until_predicate if given
        if until_predicate is not None:
            valid_frames = Recurrence(lambda h, x: (1-sequence.past_value(x)) * h, initial_state=1, name='valid_frames')(until_predicate(output))
            output = sequence.gather(output, valid_frames, name='valid_output')

        return output
Exemplo n.º 4
0
def lattice_sequence_with_softmax(label, prediction, loglikelihood, lattice, symListPath, phonePath, stateListPath, transProbPath, latticeConfigPath="LatticeNode.config", 
                                  hSmoothingWeight = 0.95, frameDropThresh = 1e-10, doReferenceAlign = False, seqGammarUsesMBR = False, 
                                  seqGammarAMF = 14.0, seqGammarLMF = 14.0, seqGammarBMMIFactor = 0.0, seqGammarWordPen = 0.0, name=''):
    from cntk.cntk_py import lattice_sequence_with_softmax
    dtype = get_data_type(label, prediction, loglikelihood, lattice)
    label = sanitize_input(label, dtype)
    prediction = sanitize_input(prediction, dtype)
    loglikelihood = sanitize_input(loglikelihood, dtype)
    lattice = sanitize_input(lattice, dtype)
    return lattice_sequence_with_softmax(label, prediction, loglikelihood, lattice, symListPath, phonePath, stateListPath, transProbPath, latticeConfigPath, hSmoothingWeight, frameDropThresh, doReferenceAlign, seqGammarUsesMBR, seqGammarAMF, seqGammarLMF, seqGammarBMMIFactor, seqGammarWordPen, name)
Exemplo n.º 5
0
def future_value(x, initial_state=None, time_step=1, name=''):
    '''
    This function returns the future value w.r.t. ``x``. It is most often used when
    creating RNNs. The resulting tensor has the same shape as the input but is
    the next logical sample. The ``time_step`` parameter is the number of steps
    to look into the future and is 1 by default. If there is no future value (i.e.
    the current sample is the last one in the tensor) then the ``initial_state``
    value is returned.

    The initial state can be a constant (scalar or tensor), a learnable tensor
    or input data (which has a batch dimension, as needed for sequence-to-sequence models).

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> # Create one sequence with 4 tensors of shape (3, 2)
        >>> x0 = np.reshape(np.arange(24,dtype=np.float32),(1,4,3,2))
        >>> y = C.sequence.future_value(x) # using initial state of 0 by default
        >>> y.eval({x:x0})
        [array([[[  6.,   7.],
                 [  8.,   9.],
                 [ 10.,  11.]],
        <BLANKLINE>
                [[ 12.,  13.],
                 [ 14.,  15.],
                 [ 16.,  17.]],
        <BLANKLINE>
                [[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]],
        <BLANKLINE>
                [[  0.,   0.],
                 [  0.,   0.],
                 [  0.,   0.]]], dtype=float32)]

    Args:
        x: the tensor (or its name) from which the future value is obtained.
        initial_state: tensor or scalar representing the initial value to be used when the input tensor is shifted in time.
        time_step (int): the number of time steps to look into the future (default 1)
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''

    from cntk.internal import sanitize_dtype_cntk
    from ...cntk_py import Constant
    from cntk.cntk_py import future_value

    if initial_state is None:
        initial_state = Constant.scalar(sanitize_dtype_cntk(x.dtype), 0.0)
    else:
        initial_state = sanitize_input(initial_state)

    x = sanitize_input(x)
    return future_value(x, initial_state, time_step, name)
Exemplo n.º 6
0
def scatter(seq, condition, new_sequence_axis_typeinfo=None, name=''):
    '''
    Performs the inverse of gather. The sequence ``seq`` must have as many
    elements as the number of True values in the sequence ``condition``.
    It will return a sequence whose length is the same as the ``condition``
    sequence with zeroes everywhere except for the locations where ``condition``
    evaluates to True in which case it will copy the elements from ``seq``
    preserving their order.

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> t = C.sequence.last(x)
        >>> b = C.sequence.is_first(x)
        >>> y = C.sequence.scatter(t, b)
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0,dtype=np.float32),(1,4,3,2))
        >>> y.eval({x:x0})
        [array([[[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]],
        <BLANKLINE>
                [[  0.,   0.],
                 [  0.,   0.],
                 [  0.,   0.]],
        <BLANKLINE>
                [[  0.,   0.],
                 [  0.,   0.],
                 [  0.,   0.]],
        <BLANKLINE>
                [[  0.,   0.],
                 [  0.,   0.],
                 [  0.,   0.]]], dtype=float32)]

    Args:
        seq: the symbolic sequence from which elements will be copied in the
            output
        condition: the symbolic sequence which denotes the locations where
            elements should be copied
        new_sequence_axis_typeinfo:  tuple of integers indicating
            the scaling and additive factors for the length of the new sequence axis
            w.r.t. the condition sequence. This is used to determine the sequence axis
            to be used for the output of the gather operation. If this argument is left
            unspecified a new independent sequence axis is created.
        name (str): the name of the node in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import scatter
    seq = sanitize_input(seq, get_data_type(seq))
    condition = sanitize_input(condition, get_data_type(condition))
    if new_sequence_axis_typeinfo is None:
        return scatter(seq, condition, name)
    else:
        return scatter(seq, condition, new_sequence_axis_typeinfo, name)
Exemplo n.º 7
0
def lambda_rank(output, gain, group, name=''):
    r'''
    Groups samples according to ``group``, sorts
    them within each group based on ``output`` and
    computes the Normalized Discounted Cumulative Gain
    (NDCG) at infinity for each group. Concretely,
    the Discounted Cumulative Gain (DCG) at infinity is:

    :math:`\mathrm{DCG_{\infty}}()=\sum_{i=0}^{\infty} \frac{gain_{(i)}}{\log(i+2)}`

    where :math:`gain_{(i)}` means the gain of the :math:`i`-th ranked sample.

    The NDCG is just the DCG  divided by the maximum achievable DCG (obtained
    by placing the samples with the largest gain at the top of the ranking).

    Samples in the same group must appear in order of decreasing gain.

    It returns 1 minus the average NDCG across all the groups in the minibatch
    multiplied by 100 times the number of samples in the minibatch.

    In the backward direction it back-propagates LambdaRank gradients.

    Example:
        >>> group = C.input_variable((1,))
        >>> score = C.input_variable((1,), needs_gradient=True)
        >>> gain  = C.input_variable((1,))
        >>> g = np.array([1, 1, 2, 2], dtype=np.float32).reshape(4,1)
        >>> s = np.array([1, 2, 3, 4], dtype=np.float32).reshape(4,1)
        >>> n = np.array([7, 1, 3, 1], dtype=np.float32).reshape(4,1)
        >>> f = C.lambda_rank(score, gain, group)
        >>> np.round(f.grad({score:s, gain:n, group: g}, wrt=[score]),4)
        array([[-0.2121],
        <BLANKLINE>
               [ 0.2121],
        <BLANKLINE>
               [-0.1486],
        <BLANKLINE>
               [ 0.1486]], dtype=float32)

    Args:
        output: score of each sample
        gain: gain of each sample
        group: group of each sample
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import lambda_rank
    dtype = get_data_type(output, gain, group)
    output = sanitize_input(output, dtype)
    gain = sanitize_input(gain, dtype)
    group = sanitize_input(group, dtype)
    return lambda_rank(output, gain, group, name)
Exemplo n.º 8
0
def uniform_like(x, low=0.0, high=1.0, seed=auto_select, name=''):
    """uniform_like(x, low=0.0, high=1.0, seed=auto_select, name='')
    Generates samples from the uniform distribution in the interval [`low`,`high`).

    Args:
        x: cntk variable (input, output, parameter, or constant) from which to copy the shape, data type, and dynamic axes.
        low (float): lower end of the range of the random numbers
        high (float): upper end of the range of the random numbers
        seed (int): pseudo random number generator seed (default: automatically select a unique seed)
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    Examples:
        >>> x = C.input_variable(4)
        >>> x0 = np.zeros((3,4), dtype=np.float32)
        >>> u = C.random.uniform_like(x, seed=98052)
        >>> u.eval({x:x0}, device=C.cpu()) # explicitly setting cpu because this is tested on multiple platforms; leave it unspecified in your code
        array([[ 0.931785,  0.814722,  0.479606,  0.937468],
               [ 0.004351,  0.185131,  0.00632 ,  0.118901],
               [ 0.710054,  0.304273,  0.043126,  0.987818]], dtype=float32)
    """
    from cntk.cntk_py import uniform_random_like
    x = sanitize_input(x)
    return uniform_random_like(x, low, high, seed, name)
Exemplo n.º 9
0
    def convolution(cntk_layer, inputs):
        '''
         Setup convolution op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of convolution op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk convolution op
        '''
        sanitize_input = internal.sanitize_input(inputs[0])
        params = cntk_layer.parameters
        output_channel = params.output
        kernel_size = params.kernel
        kernel_shape = (output_channel, int(sanitize_input.shape[0] / params.group)) + tuple(kernel_size)
        kernel_init = None
        if cntk_layer.parameter_tensor:
            kernel_data_tensor = cntk_layer.parameter_tensor[0]
            kernel_init = np.asarray(kernel_data_tensor.data, dtype=np.float32)
            kernel_init = np.reshape(kernel_init, newshape=kernel_shape)
        bias_shape = (output_channel, ) + (1,) * 2
        bias_init = None
        if params.need_bias:
            if cntk_layer.parameter_tensor:
                bias_data_tensor = cntk_layer.parameter_tensor[1]
                bias_init = np.asarray(bias_data_tensor.data, dtype=np.float32)
                bias_init = np.reshape(bias_init, bias_shape)
        return BlockApiSetup.convolution(output_channel, kernel_size, stride=params.stride, pad=params.auto_pad,
                                         kernel_init=kernel_init, bias_init=bias_init,
                                         group=params.group, dilation=params.dilation,
                                         name=cntk_layer.op_name)(sanitize_input)
Exemplo n.º 10
0
def normal_like(x, mean=0.0, scale=1.0, seed=auto_select, name=''):
    """normal_like(x, mean=0.0, scale=1.0, seed=auto_select, name='')
    Generates samples from the normal distribution with mean `mean` and standard deviation `scale`.

    Args:
        x: cntk variable (input, output, parameter, or constant) from which to copy the shape, data type, and dynamic axes.
        mean (float): mean of the distribution
        scale (float): scale (standard deviation) of the distribution
        seed (int): pseudo random number generator seed (default: automatically select a unique seed)
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    Examples:
        >>> x = C.parameter((2,3,4))
        >>> z = C.random.normal_like(x, seed=98052)
        >>> z.eval(device=C.cpu()) # explicitly setting cpu because this is tested on multiple platforms; leave it unspecified in your code
        array([[[ 1.803254,  0.995395, -0.631974, -1.73672 ],
                [ 0.005615, -0.340025, -0.011913, -0.236371],
                [-1.207685, -0.495846,  0.037022, -1.220596]],
        <BLANKLINE>
               [[ 0.872981,  0.654405, -0.111421, -0.544074],
                [ 1.543746, -0.63555 , -1.072869, -0.379701],
                [ 0.592069, -1.035192,  1.679303, -0.391963]]], dtype=float32)
    """
    from cntk.cntk_py import normal_random_like
    x = sanitize_input(x)
    return normal_random_like(x, mean, scale, seed, name)
Exemplo n.º 11
0
def unpack(x, padding_value, no_mask_output=False, name=''):
    '''
    This function unpacks the specified sequence operand 'x' along the most
    significant static axis [-1] and pads any gaps with the specified 'padding_value'.
    If the 'no_mask_output' argument is False, the returned Function has 2 outputs;
    viz. the unpacked non-sequence data and a mask denoting the gaps in the unpacked output
    due to differences across lengths of the sequences in the operand.

    Args:
        x: the sequence tensor (or its name) which is unpacked
        padding_value (np.float32 or np.float64): The value to pad gaps in the unpacked tensor with.
        no_mask_output (bool, optional): whether the Function has a mask tensor output denoting the
            gaps in the unpacked output due to differences across lengths of the sequences in the operand.
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    Todo:
        add an example
    '''

    from cntk.cntk_py import unpack

    x = sanitize_input(x)
    return unpack(x, padding_value, no_mask_output, name)
Exemplo n.º 12
0
def gumbel_like(x, loc=0.0, scale=1.0, seed=auto_select, name=''):
    """gumbel_like(x, mean=0.0, scale=1.0, seed=auto_select, name='')
    Generates samples from the Gumbel distribution with location `loc` and scale `scale`.

    Args:
        x: cntk variable (input, output, parameter, or constant) from which to copy the shape, data type, and dynamic axes.
        loc (float): location of the distribution
        scale (float): scale of the distribution
        seed (int): pseudo random number generator seed (default: automatically select a unique seed)
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    Examples:
        >>> x = C.constant(np.zeros((2,3,4), dtype=np.float32))
        >>> g = C.random.gumbel_like(x, seed=98052)
        >>> s = g.eval(device=C.cpu()) # explicitly setting cpu because this is tested on multiple platforms; leave it unspecified in your code
        >>> np.round(s, 4)
        array([[[-0.9877, -0.5223,  0.4259, -1.0196],
                [ 5.4352,  1.5861,  5.0608,  2.0668],
                [-0.2135,  1.0139,  3.1217, -1.4834]],
        <BLANKLINE>
               [[ 0.4507,  0.6325,  2.1682,  0.4463],
                [-0.6583,  0.1147, -0.3144, -0.7925],
                [ 1.9773, -0.3627, -0.4566, -0.2368]]], dtype=float32)

    See also:
        `The Gumbel-Max Trick
        <https://hips.seas.harvard.edu/blog/2013/04/06/the-gumbel-max-trick-for-discrete-distributions/>`_.
    """
    from cntk.cntk_py import gumbel_random_like
    x = sanitize_input(x)
    return gumbel_random_like(x, loc, scale, seed, name)
Exemplo n.º 13
0
def bernoulli_like(x, mean=0.5, seed=auto_select, name=''):
    """bernoulli_like(x, mean=0.5, seed=auto_select, name='')
    Generates samples from the Bernoulli distribution with success probability `mean`.

    Args:
        x: cntk variable (input, output, parameter, or constant) from which to copy the shape, data type, and dynamic axes.
        mean (float): success probability
        seed (int): pseudo random number generator seed (default: automatically select a unique seed)
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`

    Examples:
        >>> p = C.placeholder()
        >>> bp = C.random.bernoulli_like(p, seed=98052)
        >>> x = C.sequence.input_variable(1)
        >>> bx = bp.replace_placeholders({p:x})
        >>> x0 = np.zeros((1,3,1), dtype=np.float32)
        >>> bx.eval({x:x0}, device=C.cpu()) # explicitly setting cpu because this is tested on multiple platforms; leave it unspecified in your code
        [array([[ 1.],
               [ 1.],
               [ 0.]], dtype=float32)]
    """
    from cntk.cntk_py import bernoulli_random_like
    x = sanitize_input(x)
    return bernoulli_random_like(x, mean, seed, name)
Exemplo n.º 14
0
    def dense(cntk_layer, inputs):
        '''
         Setup dense op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of dense op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk dense op
        '''
        sanitize_input = internal.sanitize_input(inputs[0])
        input_channel = sanitize_input.shape
        output_channel = cntk_layer.parameters.num_output

        flattened_channel = reduce(mul, list(input_channel))
        scale_shape = input_channel + (output_channel, )
        bias_shape = (output_channel, )

        if cntk_layer.parameter_tensor:
            if len(cntk_layer.parameter_tensor) != 2:
                raise AssertionError('dense layer layer receives two inputs (scale/bias)')
            scale_tensor = cntk_layer.parameter_tensor[0]
            bias_tensor = cntk_layer.parameter_tensor[1]
            scale_init = np.asarray(scale_tensor.data, np.float32)
            if cntk_layer.parameters.transpose:
                scale_init = np.reshape(scale_init, (output_channel, flattened_channel))
                scale_init = np.transpose(scale_init).copy()
                scale_init = np.reshape(scale_init, scale_shape)
            else:
                scale_init = np.reshape(scale_init, scale_shape)
            bias_init = np.asarray(bias_tensor.data, np.float32)
        return BlockApiSetup.linear(bias_shape, scale_shape, scale_init, bias_init, cntk_layer.op_name)(sanitize_input)
Exemplo n.º 15
0
def edit_distance_error(input_a, input_b, subPen=1, delPen=1, insPen=1, squashInputs=False, tokensToIgnore=[], name=''):
    '''
    Edit distance error evaluation function with the option of specifying penalty of substitution, deletion and insertion, as well as squashing the input sequences and ignoring certain samples.
    Using the classic DP algorithm as described in https://en.wikipedia.org/wiki/Edit_distance, adjusted to take into account the penalties.

    Each sequence in the inputs is expected to be a matrix. Prior to computation of the edit distance, the operation extracts the indices of maximum element in each column.
    For example, a sequence matrix

    1 2 9 1

    3 0 3 2

    will be represented as the vector of labels (indices) as [1, 0, 0, 1], on which edit distance will be actually evaluated.

    The function allows to squash sequences of repeating labels and ignore certain labels. For example, if squashInputs is true and tokensToIgnore contains index of label '-' then
    given first input sequence as s1="1-12-" and second as s2="-11--122" the edit distance will be computed against s1' = "112" and s2' = "112".

    When used as an evaluation criterion, the Trainer will aggregate all values over an epoch and report the average, i.e. the error rate.
    Primary objective of this node is for error evaluation of CTC training, see formula (1) in "Connectionist Temporal Classification: Labelling Unsegmented
    Sequence Data with Recurrent Neural Networks", ftp://ftp.idsia.ch/pub/juergen/icml2006.pdf

    Example:
        >>> i1 = C.input(shape=(2,))
        >>> i2 = C.input(shape=(2,))
        >>> arguments = {i1 : [[1, 3], [2, 0]], i2 : [[2, 0], [2, 0]]}
        >>> a = C.edit_distance_error(i1, i2, 0, 1, 1, True, [1])
        >>> a.eval(arguments)
        array(1.0, dtype=float32)

    Args:
        input_a: first input sequence
        input_b: second input sequence
        subPen: substitution penalty
        delPen: deletion penalty
        insPen: insertion penalty
        squashInputs: whether to merge sequences of identical samples (in both input sequences). If true and tokensToIgnore contains label '-' then
                given first input sequence as s1="a-ab-" and second as s2="-aa--abb" the edit distance will be computed against s1' = "aab" and s2' = "aab".
        tokensToIgnore: list of indices of samples to ignore during edit distance evaluation (in both sequences)
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import edit_distance_error
    dtype = get_data_type(input_a, input_b)
    input_a = sanitize_input(input_a, dtype)
    input_b = sanitize_input(input_b, dtype)
    return edit_distance_error(input_a, input_b, subPen, delPen, insPen, squashInputs, tokensToIgnore, name)
Exemplo n.º 16
0
def test_sanitize_input(data, dtype):
    inp = sanitize_input(data, dtype)
    assert np.allclose(inp.value, data)
    assert inp.dtype == dtype
    if not isinstance(data, np.ndarray):
        assert inp.shape == np.asarray(data).shape
    else:
        assert inp.shape == data.shape
Exemplo n.º 17
0
def broadcast_as(operand, broadcast_as_operand, name=''):
    '''
    Creates a sequence out of a non-sequence by endowing the ``operand``
    with dynamic axes of the same type as the ``broadcast_as_operand``
    and broadcasting the value of the ``operand`` along those dynamic axes.

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> t = C.sequence.last(x)
        >>> b = C.sequence.is_first(x)
        >>> y = C.sequence.broadcast_as(t, b)
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0,dtype=np.float32),(1,4,3,2))
        >>> y.eval({x:x0})
        [array([[[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]],
        <BLANKLINE>
                [[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]],
        <BLANKLINE>
                [[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]],
        <BLANKLINE>
                [[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]]], dtype=float32)]

    Args:
        operand: the symbolic tensor whose value will be broadcast
        broadcast_as_operand: the symbolic tensor whose dynamic axes will
            be used to broadcast the operand
        name (str): the name of the node in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import broadcast_as
    operand = sanitize_input(operand, get_data_type(operand, broadcast_as_operand))
    broadcast_as_operand = sanitize_input(
        broadcast_as_operand, get_data_type(broadcast_as_operand))
    return broadcast_as(operand, broadcast_as_operand, name)
Exemplo n.º 18
0
def binary_cross_entropy(output, target, name=''):
    r'''
    Computes the binary cross entropy (aka logistic loss) between the ``output`` and ``target``.

    Example:
        TBA

    Args:
        output: the computed posterior probability for a variable to be 1 from the network (typ. a ``sigmoid``)
        target: ground-truth label, 0 or 1
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import binary_cross_entropy
    dtype = get_data_type(output, target)
    output = sanitize_input(output, dtype)
    target = sanitize_input(target, dtype)
    return binary_cross_entropy(output, target, name)
Exemplo n.º 19
0
def gather(seq, condition, new_sequence_axis_typeinfo=None, name=''):
    '''
    Takes two sequences of the same length and returns a new sequence whose
    elements are those elements of sequence ``seq`` whose corresponding element
    in ``condition`` is True, preserving the ordering of ``seq``.

    This operation is also known as stream compaction, or copy_if.

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> z = C.greater(C.reduce_sum(x),60)
        >>> y = C.sequence.gather(x,z)
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0,dtype=np.float32),(1,4,3,2))
        >>> y.eval({x:x0})
        [array([[[ 12.,  13.],
                 [ 14.,  15.],
                 [ 16.,  17.]],
        <BLANKLINE>
                [[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]]], dtype=float32)]

    Args:
        seq: the symbolic sequence from which elements will be selected
        condition: the symbolic sequence of booleans which indicate which
            elements should be selected
        new_sequence_axis_typeinfo:  tuple of integers indicating
            the scaling and additive factors for the length of the new sequence axis
            w.r.t. the operand sequence. This is used to determine the sequence axis
            to be used for the output of the gather operation. If this argument is left
            unspecified, a new independent sequence axis is created.
        name (str): the name of the node in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import gather
    seq = sanitize_input(seq, get_data_type(seq))
    condition = sanitize_input(condition, get_data_type(condition))
    if new_sequence_axis_typeinfo is None:
        return gather(seq, condition, name)
    else:
        return gather(seq, condition, new_sequence_axis_typeinfo, name)
Exemplo n.º 20
0
def ndcg_at_1(output, gain, group, name=''):
    r'''
    Groups samples according to ``group``, sorts
    them within each group based on ``output`` and
    computes the Normalized Discounted Cumulative Gain
    (NDCG) at 1 for each group. Concretely,
    the NDCG at 1 is:

    :math:`\mathrm{NDCG_1} = \frac{gain_{(1)}}{\max_i gain_i}`

    where :math:`gain_{(1)}` means the gain of the first ranked sample.

    Samples in the same group must appear in order of decreasing gain.

    It returns the average NDCG at 1 across all the groups in the minibatch
    multiplied by 100 times the number of samples in the minibatch.

    This is a forward-only operation, there is no gradient for it.

    Example:
        >>> group = C.input_variable((1,))
        >>> score = C.input_variable((1,))
        >>> gain  = C.input_variable((1,))
        >>> g = np.array([1, 1, 2, 2], dtype=np.float32).reshape(4,1,1)
        >>> s = np.array([2, 1, 3, 1], dtype=np.float32).reshape(4,1,1)
        >>> n = np.array([7, 1, 3, 1], dtype=np.float32).reshape(4,1,1)
        >>> C.ndcg_at_1(score, gain, group).eval({score:s, gain:n, group: g})
        array(400.0, dtype=float32)

    Args:
        output: score of each sample
        gain: gain of each sample
        group: group of each sample
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import ndcg_at_1
    dtype = get_data_type(output, gain, group)
    output = sanitize_input(output, dtype)
    gain = sanitize_input(gain, dtype)
    group = sanitize_input(group, dtype)
    return ndcg_at_1(output, gain, group, name)
Exemplo n.º 21
0
def cosine_distance_with_negative_samples(x, y, shift, num_negative_samples, name=''):
    '''

    Given minibatches for ``x`` and ``y``, this function computes for each element in `x` the cosine distance between
    it and the corresponding `y` and additionally the cosine distance between ``x`` and some other elements of ``y``
    (referred to a negative samples). The ``x`` and ``y`` pairs are samples often derived
    from embeddings of textual data, though the function can be used for any form of numeric encodings.
    When using this function to compute textual similarity, ``x`` represents search query term embedding
    and ``y`` represents a document embedding. The negative samples are formed on the fly by shifting
    the right side (``y``). The ``shift`` indicates how many samples in ``y`` one should shift while
    forming each negative sample pair. It is often chosen to be 1. As the name suggests
    ``num_negative_samples`` indicates how many negative samples one would want to generate.

    Example:
        >>> qry = np.asarray([1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1.], dtype=np.float32).reshape(3, 1, 4)
        >>> doc = np.asarray([1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1.], dtype=np.float32).reshape(3, 1, 4)
        >>> x = C.sequence.input_variable(shape=(4,))
        >>> y = C.sequence.input_variable(shape=(4,))
        >>> model = C.cosine_distance_with_negative_samples(x, y, shift=1, num_negative_samples=2)
        >>> np.round(model.eval({x: qry, y: doc}), decimals=4)
        array([[[ 1. ,  0.5,  0. ]],
        <BLANKLINE>
               [[ 1. ,  0.5,  0.5]],
        <BLANKLINE>
               [[ 1. ,  0. ,  0.5]]], dtype=float32)

    Args:
        x: numpy array or any :class:`~cntk.ops.functions.Function` that outputs a tensor
        y: numpy array or any :class:`~cntk.ops.functions.Function` that outputs a tensor
        shift: non-zero positive integer representing number of shift to generate a negative sample
        num_negative_samples: number of negative samples to generate, a non-zero positive integer
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import cosine_distance_with_negative_samples
    dtype = get_data_type(x, y)
    x = sanitize_input(x, dtype)
    y = sanitize_input(y, dtype)

    return cosine_distance_with_negative_samples(x, y, shift, num_negative_samples, name)
Exemplo n.º 22
0
def weighted_binary_cross_entropy(output, target, weight, name=''):
    r'''
    This operation computes the weighted binary cross entropy (aka logistic loss) between the ``output`` and ``target``.

    Example:
        TBA

    Args:
        output: the computed posterior probability from the network
        target: ground-truth label, 0 or 1
        weight: weight of each example
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import weighted_binary_cross_entropy
    dtype = get_data_type(output, target, weight)
    output = sanitize_input(output, dtype)
    target = sanitize_input(target, dtype)
    weight = sanitize_input(weight, dtype)
    return weighted_binary_cross_entropy(output, target, weight, name)
Exemplo n.º 23
0
def cross_entropy_with_softmax(output_vector, target_vector, axis=-1, name=''):
    r'''
    This operation computes the cross entropy between the ``target_vector`` and
    the softmax of the ``output_vector``. The elements of ``target_vector``
    have to be non-negative and should sum to 1. The ``output_vector`` can
    contain any values. The function will internally compute the softmax of
    the ``output_vector``. Concretely,

    :math:`\mathrm{softmax}(x)=\left[\frac{\exp(x_1)}{\sum_i\exp(x_i)}\quad\frac{\exp(x_1)}{\sum_i\exp(x_i)}\quad\ldots\quad\frac{\exp(x_1)}{\sum_i\exp(x_i)}\right]`

    :math:`\mathrm{cross\_entropy\_with\_softmax}(o, t) = -\sum_{i} t_i \log(\mathrm{softmax}(o)_i)`

    with the understanding that the implementation can use equivalent formulas
    for efficiency and numerical stability.

    Example:
        >>> C.cross_entropy_with_softmax([[1., 1., 1., 50.]], [[0., 0., 0., 1.]]).eval()
        array([[ 0.]], dtype=float32)

        >>> C.cross_entropy_with_softmax([[1., 2., 3., 4.]], [[0.35, 0.15, 0.05, 0.45]]).eval()
        array([[ 1.84019]], dtype=float32)

    Args:
        output_vector: the unscaled computed output values from the network
        target_vector: usually it is one-hot vector where the hot bit
         corresponds to the label index. But it can be any probability
         distribution over the labels.
        axis (int or :class:`~cntk.axis.Axis`, optional): if given, cross entropy will be computed
                along this axis
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import cross_entropy_with_softmax
    dtype = get_data_type(output_vector, target_vector)
    output_vector = sanitize_input(output_vector, dtype)
    target_vector = sanitize_input(target_vector, dtype)
    axis = sanitize_axis(axis)
    return cross_entropy_with_softmax(output_vector, target_vector, axis, name)
Exemplo n.º 24
0
def softmax(seq, name = ''):
    '''
    Computes the softmax of the input across the sequence axis.

    Args:
        seq: sequence input tensor
        name (`str`, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import sequence_softmax
    seq = sanitize_input(seq, get_data_type(seq))
    return sequence_softmax(seq, name)
Exemplo n.º 25
0
def classification_error(output_vector, target_vector, axis=-1, topN=1, name=''):
    '''
    This operation computes the classification error. It finds the index of the highest
    value in the output_vector and compares it to the actual ground truth label
    (the index of the hot bit in the target vector). The result is a scalar
    (i.e., one by one matrix). This is often used as an evaluation criterion.
    It cannot be used as a training criterion though since the gradient is not
    defined for it.

    Example:
        >>> C.classification_error([[1., 2., 3., 4.]], [[0., 0., 0., 1.]]).eval()
        array([[ 0.]], dtype=float32)

        >>> C.classification_error([[1., 2., 3., 4.]], [[0., 0., 1., 0.]]).eval()
        array([[ 1.]], dtype=float32)

        >>> # Note that non-1 values are treated as 0
        >>> C.classification_error([[1., 2., 3., 4.]], [[5., 0., 1., 0.]]).eval()
        array([[ 1.]], dtype=float32)

    Args:
        output_vector: the output values from the network
        target_vector: it is one-hot vector where the hot bit corresponds to
         the label index.
        axis (int or :class:`~cntk.axis.Axis`): axis along which the
         classification error will be computed.
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import classification_error
    dtype = get_data_type(output_vector, target_vector)
    output_vector = sanitize_input(output_vector, dtype)
    target_vector = sanitize_input(target_vector, dtype)
    axis = sanitize_axis(axis)
    return classification_error(output_vector, target_vector, topN, axis, name)
Exemplo n.º 26
0
    def relu(cntk_layer, inputs):
        '''
         Setup ReLU op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of ReLU op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk ReLU op
        '''
        sanitize_input = internal.sanitize_input(inputs[0])
        return ops.relu(sanitize_input, name=cntk_layer.op_name)
Exemplo n.º 27
0
    def batch_norm(cntk_layer, inputs):
        '''
         Setup batch normalization op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of batch normalization op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk batch normalization op
        '''
        sanitize_input = internal.sanitize_input(inputs[0])
        parameter_tensor = (sanitize_input.shape[0], )
        scale_init = 1
        bias_init = 0
        mean_init = 1
        var_init = 0
        if cntk_layer.parameter_tensor:
            if len(cntk_layer.parameter_tensor) < 3:
                raise AssertionError('At least three tensors (saved_mean, saved_variance and scale) are needed')
            mean_tensor = cntk_layer.parameter_tensor[0]
            variance_tensor = cntk_layer.parameter_tensor[1]
            global_scale = cntk_layer.parameter_tensor[2].data[0]
            moving_average_factor = 1 / global_scale if global_scale != 0 else 0
            mean_init = np.asarray(mean_tensor.data, dtype=np.float32) * moving_average_factor
            var_init = np.asarray(variance_tensor.data, dtype=np.float32) * moving_average_factor
            if len(cntk_layer.parameter_tensor) == 5:
                scale_tensor = cntk_layer.parameter_tensor[3]
                bias_tensor = cntk_layer.parameter_tensor[4]
                scale_init = np.asarray(scale_tensor.data, dtype=np.float32)
                bias_init = np.asarray(bias_tensor.data, dtype=np.float32)

        scale_parameters = ops.parameter(parameter_tensor, init=scale_init, name='.'.join((cntk_layer.op_name, 'scale')))
        bias_parameters = ops.parameter(parameter_tensor, init=bias_init, name='.'.join((cntk_layer.op_name, 'bias')))
        mean_parameters = ops.parameter(parameter_tensor, init=mean_init, name='.'.join((cntk_layer.op_name, 'mean')))
        var_parameters = ops.parameter(parameter_tensor, init=var_init, name='.'.join((cntk_layer.op_name, 'var')))
        epsilon = cntk_layer.parameters.epsilon

        return ops.batch_normalization(sanitize_input, scale_parameters, bias_parameters, mean_parameters,
                                       var_parameters, True, use_cudnn_engine=False, epsilon=epsilon,
                                       running_count=ops.constant(0),
                                       name=cntk_layer.op_name)
Exemplo n.º 28
0
    def pooling(cntk_layer, inputs):
        '''
         Setup pooling op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of pooling op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk pooling op
        '''
        sanitize_input = internal.sanitize_input(inputs[0])
        pooling_type = ops.PoolingType_Average if cntk_layer.parameters.pooling_type else ops.PoolingType_Max
        return ops.pooling(sanitize_input, pooling_type, tuple(cntk_layer.parameters.kernel),
                           strides=tuple(cntk_layer.parameters.stride),
                           auto_padding=[cntk_layer.parameters.auto_pad],
                           ceil_out_dim=True,
                           name=cntk_layer.op_name)
Exemplo n.º 29
0
def where(condition, name=''):
    '''
    Given a symbolic sequence ``condition`` of boolean-like (1/0) values, it will return
    a new sequence containing the indices for which the values were true.

    If ``condition`` has a value other than 0 or 1, it will denote a repeat factor.
    If a repeat factor is fractional, it will round up but deduct the overshoot from the
    next repeat factor.

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> z = C.greater(C.reduce_sum(x), 60)
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0, dtype=np.float32), (1,4,3,2))
        >>> z.eval({x:x0})
        [array([ 0.,  0.,  1.,  1.], dtype=float32)]
        >>> y = C.sequence.where(z)
        >>> y.eval({x:x0})
        [array([ 2.,  3.], dtype=float32)]

        >>> # repeat frame[1] twice, frame[3] three times, and frame[4] twice
        >>> C.sequence.where(C.sequence.input_variable(1)).eval([[[1], [2], [1], [3], [2]]])
        [array([ 0.,  1.,  1.,  2.,  3.,  3.,  3.,  4.,  4.], dtype=float32)]
        >>> # note that the above are the indices that are passed to

        >>> # repeat frames with a fractional factor
        >>> C.sequence.where(C.sequence.input_variable(1)).eval([[[1.2]]*10])
        [array([ 0.,  0.,  1.,  2.,  3.,  4.,  5.,  5.,  6.,  7.,  8.,  9.],
            dtype=float32)]
        >>> # as a result, a 1.2 times stretch is realized by duplicating frame[0] and frame[5]

    Args:
        condition: sequence of 0 or 1 values for filtering, or other positive values for repetition (also fractional)
        name (str): the name of the node in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import where
    condition = sanitize_input(condition, get_data_type(condition))
    return where(condition, name)
Exemplo n.º 30
0
def slice(seq, begin_index, end_index, name=''):
    '''
    Slice the input sequence.

    Args:
        seq: sequence input tensor
        begin_index (`int`): the index along sequence axis where the slicing starts
        end_index (`int`): the index along sequence axis where the slicing ends
        name (`str`, optional): the name of the Function instance in the network

    See also:
        Indexing in NumPy: https://docs.scipy.org/doc/numpy/reference/arrays.indexing.html

    Returns:
        :class:`~cntk.ops.functions.Function`

    Todo:
        add an example
    '''
    from cntk.cntk_py import sequence_slice
    seq = sanitize_input(seq, get_data_type(seq))
    return sequence_slice(seq, begin_index, end_index, name)
Exemplo n.º 31
0
def is_last(seq, name=''):
    '''
    Returns a symbolic sequence of booleans with the same length as ``seq``. The
    last element of the sequence is 1 and all others are 0.

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> y = C.sequence.is_last(x)
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0,dtype=np.float32),(1,4,3,2))
        >>> y.eval({x:x0})
        [array([ 0.,  0.,  0.,  1.], dtype=float32)]

    Args:
        seq: the symbolic tensor denoting a sequence
        name (str): the name of the node in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import is_last
    seq = sanitize_input(seq, get_data_type(seq))
    return is_last(seq, name)
Exemplo n.º 32
0
def first(seq, name=''):
    '''
    Returns the first element of its symbolic input sequence ``seq``

    Example:
        >>> x = C.sequence.input_variable(shape=(3,2))
        >>> y = C.sequence.first(x)
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0,dtype=np.float32),(1,4,3,2))
        >>> y.eval({x:x0})
        array([[[ 0.,  1.],
                 [ 2.,  3.],
                 [ 4.,  5.]]], dtype=float32)

    Args:
        seq: the symbolic tensor denoting a sequence
        name (str): the name of the node in the network
    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import first
    seq = sanitize_input(seq, get_data_type(seq))
    return first(seq, name)
Exemplo n.º 33
0
def reduce_sum(seq, name=''):
    '''
    Computes the sum of the input sequence's elements across the sequence axis.

    Examples:
        >>> x = C.sequence.input(shape=(3,2))
        >>> # create one sequence of 4 tensors each with shape (3,2)
        >>> x0 = np.reshape(np.arange(24.0,dtype=np.float32),(1,4,3,2))
        >>> y = C.sequence.reduce_sum(x)
        >>> y.eval({x:x0})
        array([[[ 36.,  40.],
                 [ 44.,  48.],
                 [ 52.,  56.]]], dtype=float32)

    Args:
        seq: sequence input tensor
        name (`str`, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    '''
    from cntk.cntk_py import sequence_reduce_sum
    seq = sanitize_input(seq, get_data_type(seq))
    return sequence_reduce_sum(seq, name)
Exemplo n.º 34
0
def test_sanitize_input(data, dtype):
    inp = sanitize_input(data, dtype)
    assert np.allclose(inp.value, data)
    assert inp.dtype == dtype
Exemplo n.º 35
0
    def batch_norm(cntk_layer, inputs):
        '''
         Setup batch normalization op with given parameters

        Args:
            cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`):
                the layer definition of batch normalization op
            inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or
                :class:`~cntk.input`

        Return:
            :func:`~cntk.ops.functions.Function`: instaced cntk batch normalization op
        '''
        sanitize_input = internal.sanitize_input(inputs[0])
        parameter_tensor = (sanitize_input.shape[0], )
        scale_init = 1
        bias_init = 0
        mean_init = 1
        var_init = 0
        if cntk_layer.parameter_tensor:
            if len(cntk_layer.parameter_tensor) < 3:
                raise AssertionError(
                    'At least three tensors (saved_mean, saved_variance and scale) are needed'
                )
            mean_tensor = cntk_layer.parameter_tensor[0]
            variance_tensor = cntk_layer.parameter_tensor[1]
            global_scale = cntk_layer.parameter_tensor[2].data[0]
            moving_average_factor = 1 / global_scale if global_scale != 0 else 0
            mean_init = np.asarray(mean_tensor.data,
                                   dtype=np.float32) * moving_average_factor
            var_init = np.asarray(variance_tensor.data,
                                  dtype=np.float32) * moving_average_factor
            if len(cntk_layer.parameter_tensor) == 5:
                scale_tensor = cntk_layer.parameter_tensor[3]
                bias_tensor = cntk_layer.parameter_tensor[4]
                scale_init = np.asarray(scale_tensor.data, dtype=np.float32)
                bias_init = np.asarray(bias_tensor.data, dtype=np.float32)

        scale_parameters = ops.parameter(parameter_tensor,
                                         init=scale_init,
                                         name='.'.join(
                                             (cntk_layer.op_name, 'scale')))
        bias_parameters = ops.parameter(parameter_tensor,
                                        init=bias_init,
                                        name='.'.join(
                                            (cntk_layer.op_name, 'bias')))
        mean_parameters = ops.parameter(parameter_tensor,
                                        init=mean_init,
                                        name='.'.join(
                                            (cntk_layer.op_name, 'mean')))
        var_parameters = ops.parameter(parameter_tensor,
                                       init=var_init,
                                       name='.'.join(
                                           (cntk_layer.op_name, 'var')))
        epsilon = cntk_layer.parameters.epsilon

        return ops.batch_normalization(sanitize_input,
                                       scale_parameters,
                                       bias_parameters,
                                       mean_parameters,
                                       var_parameters,
                                       True,
                                       use_cudnn_engine=False,
                                       epsilon=epsilon,
                                       running_count=ops.constant(0),
                                       name=cntk_layer.op_name)
Exemplo n.º 36
0
def nce_loss(weights,
             biases,
             inputs,
             labels,
             noise_distribution,
             num_samples=32,
             allow_duplicates=True,
             seed=auto_select,
             name=''):
    '''nce_loss(weights, biases, inputs, labels, noise_distribution, num_samples=32, allow_duplicates=True, seed=auto_select, name='')
    Computes the noise contrastive estimation loss. This implementation mostly
    follows Chris Dyer's notes [1]. At a high level, this layer draws
    `num_samples` random labels from `noise_distribution` and then forms
    `num_samples`+1 binary classification problems where the true label is
    considered a positive example and the random labels are considered negative
    examples. The negatives are shared among all the examples in the
    minibatch. This operation only computes the logits for the labels in the
    minibatch and the random labels drawn from `noise_distribution`. The
    gradients will be sparse if the labels are sparse.

    The `noise_distribution` is read once and certain quantities are
    precomputed based on it. This operation will need to be reinstantiated if
    the `noise_distribution` changes.

    Shape inference for the weights is currently not supported when inputs are
    placeholders. Either a concrete input must be used or the weights must be
    provided without any inferred dimensions.

    Example:
        >>> import scipy
        >>> # dimensions of input, number of noise labels, batch size, number of classes
        >>> xdim = 10
        >>> samples = 32
        >>> batch = 4
        >>> classes = 100
        >>> # some variables; typically x will be the output of a layer
        >>> x = C.input_variable(xdim)
        >>> y = C.input_variable(classes, is_sparse=True)
        >>> # dummy data
        >>> x0 = np.arange(batch * xdim, dtype=np.float32).reshape((batch, xdim))/(batch * xdim)
        >>> data = np.ones(batch, dtype=np.float32)
        >>> indices = list(range(10, 10*batch+1, 10))
        >>> indptr = list(range(batch + 1))
        >>> y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes))
        >>> # a dummy noise distribution
        >>> q = np.arange(classes, dtype=np.float32) + 1 # normalization not necessary
        >>> # the parameters
        >>> b = C.parameter((classes, 1), init=-np.log(classes))
        >>> W = C.parameter((classes, C.InferredDimension), init=C.glorot_uniform(seed=98052))
        >>> # the loss
        >>> loss = C.nce_loss(W, b, x, y, q, seed=98052)
        >>> # evaluate the loss at our dummy data
        >>> np.round(loss.eval({x:x0, y:y0}), decimals=3)
        array([ 2.385,  3.035,  3.886,  3.868], dtype=float32)
        >>> # after training, use the logits for predictions
        >>> logits = C.times(W, C.reshape(x, (xdim, 1))) + b

    Args:
        weights: parameter (or variable in general) containing the weights with
         which inputs will be multiplied. Its shape must be
         (number of classes, dimension of input)
        biases: parameter (or variable in general) containing the biases that
         will be added to the product of weights and inputs. Its shape must be
         (number of classes, 1)
        inputs: vector of inputs to this layer. Multiplying by the weights and
         adding the biases gives the logits.
        labels: a one-hot vector with the ground-truth labels.
        noise_distribution: a constant vector with dimension equal to the number
         of classes. The entries must be positive numbers but do not have to
         sum to 1. random labels will be drawn according to the normalized
         distribution.
        num_samples: number of random labels that will be drawn from the
         `noise_distribution`.
        allow_duplicates: boolean. If True (default), the random labels can
         contain duplicates. Compared to `allow_duplicates=False` it is faster
         but the quality of the approximations is slightly worse for the same
         number of samples.
        seed: random seed. The default value selects a unique random seed.
        name (str, optional): the name of the Function instance in the network
    Returns:
        :class:`~cntk.ops.functions.Function`

    See also:
        [1] C. Dyer. `Notes on Noise Contrastive Estimation and Negative Sampling [pdf] <http://demo.clab.cs.cmu.edu/cdyer/nce_notes.pdf>`_.
    '''
    from cntk.cntk_py import nce_loss
    dtype = get_data_type(inputs, labels, noise_distribution)
    inputs = sanitize_input(inputs, dtype)
    labels = sanitize_input(labels, dtype)
    noise_distribution = sanitize_input(noise_distribution, dtype)
    return nce_loss(weights, biases, inputs, labels, noise_distribution,
                    num_samples, allow_duplicates, seed, name)
Exemplo n.º 37
0
def past_value(x, initial_state=None, time_step=1, name=''):
    '''
    This function returns the past value w.r.t. ``x``. It is most often used when
    creating RNNs. The resulting tensor has the same shape as the input but is
    the previous logical sample. The ``time_step`` parameter is the number of steps
    to look into the past and is 1 by default. If there is no past value (i.e.
    the current sample is the first one in the tensor)  then the ``initial_state``
    value is returned.

    The initial state can be a constant (scalar or tensor), a learnable tensor
    or input data (which has a batch dimension, as needed for sequence-to-sequence models).

    Example:
        >>> # create example input: one sequence with 4 tensors of shape (3, 2)
        >>> from cntk.layers.typing import Tensor, Sequence
        >>> x = C.sequence.input((3,2))
        >>> x0 = np.reshape(np.arange(24,dtype=np.float32),(1,4,3,2))
        >>> x0
        array([[[[  0.,   1.],
                 [  2.,   3.],
                 [  4.,   5.]],
        <BLANKLINE>
                [[  6.,   7.],
                 [  8.,   9.],
                 [ 10.,  11.]],
        <BLANKLINE>
                [[ 12.,  13.],
                 [ 14.,  15.],
                 [ 16.,  17.]],
        <BLANKLINE>
                [[ 18.,  19.],
                 [ 20.,  21.],
                 [ 22.,  23.]]]], dtype=float32)

        >>> # this demonstrates how past_value shifts the sequence by one, padding with initial_state
        >>> y = C.sequence.past_value(x) # initial_state is 0 by default
        >>> y.eval({x:x0})
        [array([[[  0.,   0.],
                 [  0.,   0.],
                 [  0.,   0.]],
        <BLANKLINE>
                [[  0.,   1.],
                 [  2.,   3.],
                 [  4.,   5.]],
        <BLANKLINE>
                [[  6.,   7.],
                 [  8.,   9.],
                 [ 10.,  11.]],
        <BLANKLINE>
                [[ 12.,  13.],
                 [ 14.,  15.],
                 [ 16.,  17.]]], dtype=float32)]

        >>> # here, we pass a the initial_state as input data (e.g. sequence-to-sequence)
        >>> s = C.input((3,2))  # not a sequence, e.g. a final encoder hidden state
        >>> s0 = np.reshape(np.arange(6,dtype=np.float32)/2,(1,3,2))
        >>> s0
        array([[[ 0. ,  0.5],
                [ 1. ,  1.5],
                [ 2. ,  2.5]]], dtype=float32)
        >>> y = C.sequence.past_value(x, initial_state=s)
        >>> y.eval({x:x0, s:s0}) # same as the previous example except for the first time step
        [array([[[  0. ,   0.5],
                 [  1. ,   1.5],
                 [  2. ,   2.5]],
        <BLANKLINE>
                [[  0. ,   1. ],
                 [  2. ,   3. ],
                 [  4. ,   5. ]],
        <BLANKLINE>
                [[  6. ,   7. ],
                 [  8. ,   9. ],
                 [ 10. ,  11. ]],
        <BLANKLINE>
                [[ 12. ,  13. ],
                 [ 14. ,  15. ],
                 [ 16. ,  17. ]]], dtype=float32)]

    Args:
        x: the tensor (or its name) from which the past value is obtained
        initial_state: tensor or scalar representing the initial value to be used when the input tensor is shifted in time.
        time_step (int): the number of time steps to look into the past (default 1)
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`
    '''

    from cntk.internal import sanitize_dtype_cntk
    from cntk.cntk_py import Constant, past_value

    if initial_state is None:
        initial_state = Constant.scalar(sanitize_dtype_cntk(np.float32), 0.0)
    else:
        initial_state = sanitize_input(initial_state)

    x = sanitize_input(x)
    return past_value(x, initial_state, time_step, name)