Exemplo n.º 1
0
def HighwayBlock(
        dim,  # ideally this should be inferred, but times does not allow inferred x inferred parameter for now    transform_weight_initializer=0
        transform_weight_initializer=0,
        transform_bias_initializer=0,
        update_weight_initializer=0,
        update_bias_initializer=0,
        name=''):
    WT = C.Parameter((
        dim,
        dim,
    ),
                     init=transform_weight_initializer,
                     name=name + '_WT')
    bT = C.Parameter(dim, init=transform_bias_initializer, name=name + '_bT')
    WU = C.Parameter((
        dim,
        dim,
    ),
                     init=update_weight_initializer,
                     name=name + '_WU')
    bU = C.Parameter(dim, init=update_bias_initializer, name=name + '_bU')

    @C.Function
    def func(x_var):
        x = C.placeholder()
        transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT)
        update = C.relu(C.times(x, WU, name=name + '_U') + bU)
        return C.as_block(
            x + transform_gate * (update - x),  # trans(x)*u(x)+(1-f(x))*x
            [(x, x_var)],
            'HighwayBlock',
            'HighwayBlock' + name)

    return func
Exemplo n.º 2
0
def test_BatchNormalization(tmpdir):
    dtype = np.float32

    sample = [  # 5 samples having 4 classes
        [1, 1, 2, 3],
        [0, 0, 0, 0],
        [3, 3, 4, 4],
        [1000, 1000, 1000, 1000],
        [10000, 10000, 10000, 10000]]

    epsilon = 0.00001

    t = np.asarray(sample, dtype=dtype).reshape(-1,1)
    mean = 1
    var = 2
    init_scale = 3
    init_bias = 4

    scale        = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype)
    bias         = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype)
    run_mean     = C.ops.constant(mean, shape=(1), dtype=dtype)
    run_variance = C.ops.constant(var,  shape=(1), dtype=dtype)
    run_count    = C.ops.constant(0,               dtype=dtype)

    a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a')

    op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False,
        epsilon=epsilon)

    verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
Exemplo n.º 3
0
    def create_model(self):
        w = cntk.Parameter((self.number_features, self.number_labels),
                           init=cntk.glorot_uniform(),
                           name='W')
        b = cntk.Parameter((self.number_labels, ), init=0, name='b')

        self.model = cntk.times(self.input_transform, w) + b
Exemplo n.º 4
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,  # Node providing the output of the recurrent layers
    target_vector,  # Node providing the expected labels (as sparse vectors)
    vocab_dim,  # Vocabulary size
    hidden_dim,  # Dimension of the hidden vector
    num_samples,  # Number of samples to use for sampled softmax
    sampling_weights,  # Node providing weights to be used for the weighted sampling
    allow_duplicates=False  # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
):
    bias = C.Parameter(shape=(vocab_dim, 1), init=0)
    weights = C.Parameter(shape=(vocab_dim, hidden_dim),
                          init=C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(
        sampling_weights, num_samples,
        allow_duplicates)  # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(
        sampling_weights, num_samples,
        allow_duplicates)  # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs)  # dense row [1 * vocab_dim]

    print("hidden_vector: " + str(hidden_vector.shape))
    wS = C.times(sample_selector, weights,
                 name='wS')  # [num_samples * hidden_dim]
    print("ws:" + str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(
        sample_selector, bias, name='zS2') - C.times_transpose(
            sample_selector, log_prior, name='zS3')  # [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT')  # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(
        target_vector, bias, name='zT2') - C.times_transpose(
            target_vector, log_prior, name='zT3')  # [1]

    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape=(vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
Exemplo n.º 5
0
 def func(x_var):
     x  = C.placeholder()
     WT = C.Parameter((dim,dim,), init=transform_weight_initializer, name=name+'_WT')
     bT = C.Parameter(dim,        init=transform_bias_initializer,   name=name+'_bT')
     WU = C.Parameter((dim,dim,), init=update_weight_initializer,    name=name+'_WU')
     bU = C.Parameter(dim,        init=update_bias_initializer,      name=name+'_bU')
     transform_gate = C.sigmoid(C.times(x, WT, name=name+'_T') + bT)
     update = C.relu(C.times(x, WU, name=name+'_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock'+name)
Exemplo n.º 6
0
def BiRecurrence(step_function: C.Function, initial_state=0, dropout_rate_input=None, dropout_rate_output=None,
                 weight_tie: bool = False, seed=SentinelValueForAutoSelectRandomSeed, name=''):
    """ Wrapper to create a bidirectional rnn

    Also comes with the option to to half the number of parameters required by  bidirectional recurrent layer.
    This is done by only using one recurrent unit to do both forward and backward computation instead of
    the usual two. A forward and backward token is used to initialise the hidden state so that the recurrent
    unit can tell the directionality.

    More details can be found in the paper 'Efficient Bidirectional Neural Machine Translation' (https://arxiv.org/abs/1908.09329)

    Example:
        a = C.sequence.input_variable(10)
        b = BiRecurrence(LSTM(100), weight_tie=True)(a)

        assert b.shape == (200, )

    Arguments:
        step_function (:class:`~cntk.ops.functions.Function` or equivalent Python function):
            This function must have N+1 inputs and N outputs, where N is the number of state variables
            (typically 1 for GRU and plain RNNs, and 2 for LSTMs).
        initial_state:
        dropout_rate_input: variational dropout on input
        dropout_rate_output: variational dropoput on output
        weight_tie (bool): whether to use only one recurrent function for computation in both direction.
        seed (int): seed for randomisation
        name (str, optional): the name of the Function instance in the network

    Returns:
        :class:`~cntk.ops.functions.Function`:
        A function that accepts one argument (which must be a sequence) and performs the recurrent operation on it
    """
    fxn1 = step_function
    fxn2 = step_function.clone(C.CloneMethod.clone, {}) if not weight_tie else fxn1

    forward_token = initial_state
    backward_token = initial_state
    if weight_tie:
        forward_token = C.Parameter(shape=(-1,), init=C.glorot_normal(), name='f_token')
        backward_token = C.Parameter(shape=(-1,), init=C.glorot_normal(), name='b_token')

    forward = Recurrence(fxn1, dropout_rate_input=dropout_rate_input, dropout_rate_output=dropout_rate_output, initial_state=forward_token, seed=seed)
    backward = Recurrence(fxn2, dropout_rate_input=dropout_rate_input, dropout_rate_output=dropout_rate_output, initial_state=backward_token, seed=seed, go_backwards=True)

    @C.Function
    def inner(x):
        output = C.splice(forward(x), backward(x), axis=-1)
        return C.layers.Label(name)(output) if name else output

    return inner
Exemplo n.º 7
0
def test_initializer_scale():
    # this should work fine:
    p = C.Parameter(shape=(1,), init=initializer.uniform(1));
    with pytest.raises(ValueError) as excinfo:
        name = 'uniform_zero'
        p = C.Parameter(shape=(1,), init=initializer.uniform(0), name=name);
        assert 'CreateInitializer' in str(excinfo.value)
        assert name in str(excinfo.value)

    with pytest.raises(ValueError) as excinfo:
        name = 'glorot_negative_one'
        p = C.Parameter(shape=(1,), init=initializer.glorot_uniform(-1), name=name);
        assert 'CreateInitializer' in str(excinfo.value)
        assert name in str(excinfo.value)
Exemplo n.º 8
0
def test_large_model_serialization_double(tmpdir):
    import os

    two_gb = 2**31
    type_size = np.dtype(np.float64).itemsize
    size = two_gb / type_size + 10

    assert size * type_size > two_gb

    device = C.device.cpu()
    i = C.sequence.input(size, dtype=np.float64)
    w = C.Parameter((size, ),
                    dtype=np.float64,
                    init=C.uniform(3.0, seed=12345),
                    device=device)
    z = C.times(i, w)

    filename = str(tmpdir / 'test_large_model_serialization_double.out')
    z.save(filename)

    assert os.path.getsize(filename) > two_gb

    y = C.Function.load(filename, device=device)

    assert (len(z.parameters) == len(y.parameters))

    for param_pair in zip(z.parameters, y.parameters):
        assert param_pair[0].shape == param_pair[1].shape
        assert np.allclose(param_pair[0].value, param_pair[1].value)
Exemplo n.º 9
0
    def build(self):
        self.input_kernel = C.Parameter(shape=(self._input_size,
                                               self._hidden_dim),
                                        init=self._input_initializer)
        self.recur_kernel = C.Parameter(shape=(self._hidden_dim, ),
                                        init=self._recurrent_initializer)
        self.bias = C.Parameter(shape=(self._hidden_dim), init=0)

        @C.Function
        def runit(h, x):
            ht = self._activation(
                C.times(x, self.input_kernel) + h * self.recur_kernel +
                self.bias)
            return ht

        return runit
Exemplo n.º 10
0
    def simi_attention(self, input, memory):
        '''
        return:
        memory weighted vectors over input [#,c][d]
        weight
        '''
        input_ph = C.placeholder()  # [#,c][d]
        mem_ph = C.placeholder()  # [#,q][d]

        input_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1)
        mem_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1)
        bias = C.Parameter(shape=(2 * self.hidden_dim, ), init=0.0)
        weight_dense = Dense(1, bias=False, input_rank=1)

        proj_inp = input_dense(input_ph)  # [#,c][d]
        proj_mem = mem_dense(mem_ph)  # [#,q][d]
        unpack_memory, mem_mask = C.sequence.unpack(
            proj_mem, 0).outputs  # [#][*=q, d] [#][*=q]
        expand_mem = C.sequence.broadcast_as(unpack_memory,
                                             proj_inp)  # [#,c][*=q,d]
        expand_mask = C.sequence.broadcast_as(mem_mask, proj_inp)  # [#,c][*=q]
        matrix = C.reshape(weight_dense(C.tanh(proj_inp + expand_mem + bias)),
                           (-1, ))  # [#,c][*=q]
        matrix = C.element_select(expand_mask, matrix, -1e30)
        logits = C.softmax(matrix, axis=0)  # [#,c][*=q]
        weight_mem = C.reduce_sum(C.reshape(logits, (-1, 1)) * expand_mem,
                                  axis=0)  # [#,c][d]
        weight_mem = C.reshape(weight_mem, (-1, ))

        return C.as_block(C.combine(weight_mem, logits), [(input_ph, input),
                                                          (mem_ph, memory)],
                          'simi_attention', 'simi_attention')
Exemplo n.º 11
0
def convolution(input, name, **kwargs):
    dim = __weights_dict[name]['weights'].ndim

    weight = np.transpose(__weights_dict[name]['weights'],
                          [dim - 1, dim - 2] + list(range(0, dim - 2)))
    w = cntk.Parameter(init=weight, name=name + '_weight')

    input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2)))

    layer = ops.convolution(w, input, **kwargs)
    if 'bias' in __weights_dict[name]:
        bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2))
        b = cntk.Parameter(init=bias, name=name + '_bias')
        layer = layer + b
    layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0])
    return layer
Exemplo n.º 12
0
def cross_entropy_with_full_softmax(
        hidden_vector,  # Node providing the output of the recurrent layers
        target_vector,  # Node providing the expected labels (as sparse vectors)
        vocab_dim,  # Vocabulary size
        hidden_dim  # Dimension of the hidden vector
):
    bias = C.Parameter(shape=(vocab_dim, 1), init=0)
    weights = C.Parameter(shape=(vocab_dim, hidden_dim),
                          init=C.initializer.glorot_uniform())

    z = C.reshape(
        C.times_transpose(weights, hidden_vector) + bias, (1, vocab_dim))
    zT = C.times_transpose(z, target_vector)
    ce = C.reduce_log_sum_exp(z) - zT
    zMax = C.reduce_max(z)
    error_on_samples = C.less(zT, zMax)
    return (z, ce, error_on_samples)
Exemplo n.º 13
0
 def build(self, require_train=False):
     gamma = C.Parameter(1,init=1)
     scales = C.Parameter(3, init=C.glorot_uniform(), name='scales')
     encoder = self.encoder_fac.build()
     bilm = self.bilm_fac.build()
     @C.Function
     def _func(x):
         ph = C.placeholder()
         first_out = encoder(ph)
         second_out, third_out = bilm(first_out).outputs # [#,*][1024]
         dup_first_out = C.splice(first_out, first_out) #[#,*][1024]
         s = C.softmax(scales)
         out = gamma*(s[0]*dup_first_out+s[1]*second_out+s[2]*third_out)
         return C.as_block(
             out, [(ph, x)],'Elmo', 'Elmo'
         )
     return _func
Exemplo n.º 14
0
def test_custom_op_with_int8_params(tmpdir):
    model_file = str(tmpdir/'test_model_params.bin')
    delete_if_file_exists(model_file)

    W1 = C.Parameter((1, 42), dtype=np.int8)
    W1.value = np.arange(42).reshape(1, 42)
    W2 = C.Parameter((1, 42), dtype=np.int8)
    W3 = C.Parameter((1, 42), dtype=np.float)
    X = C.input_variable((1, 42), dtype=np.float)

    # custom_op, output_shape, output_data_type, *operands, **kw_name
    z = C.custom_proxy_op("times", (21, 2), np.int8, X, W1, W2, W3, name ="custom_proxy")
    z.save(model_file)

    newz = C.load_model(model_file)
    assert(newz.parameters[0].shape == (1, 42))
    assert(newz.output.shape == (21, 2))
    assert (np.array_equal(W1.value, newz.parameters[0].value))
Exemplo n.º 15
0
def resblock_basic(inp, num_filters):
    c1 = C.layers.Convolution(
        (3, 3), num_filters, init=C.he_normal(), pad=True, bias=False)(inp)
    c1 = C.layers.BatchNormalization(map_rank=1)(c1)
    c1 = C.param_relu(C.Parameter(c1.shape, init=C.he_normal()), c1)

    c2 = C.layers.Convolution(
        (3, 3), num_filters, init=C.he_normal(), pad=True, bias=False)(c1)
    c2 = C.layers.BatchNormalization(map_rank=1)(c2)
    return inp + c2
Exemplo n.º 16
0
def SRResNet(h0):
    print('Generator inp shape: ', h0.shape)
    with C.layers.default_options(init=C.he_normal(), bias=False):

        h1 = C.layers.Convolution((9, 9), 64, pad=True)(h0)
        h1 = C.param_relu(C.Parameter(h1.shape, init=C.he_normal()), h1)

        h2 = resblock_basic_stack(h1, 16, 64)

        h3 = C.layers.Convolution((3, 3), 64, activation=None, pad=True)(h2)
        h3 = C.layers.BatchNormalization(map_rank=1)(h3)

        h4 = h1 + h3
        # here

        h5 = C.layers.ConvolutionTranspose2D(
            (3, 3), 64, pad=True, strides=(2, 2), output_shape=(224, 224))(h4)
        h5 = C.param_relu(C.Parameter(h5.shape, init=C.he_normal()), h5)

        h6 = C.layers.Convolution((3, 3), 3, pad=True)(h5)

        return h6
Exemplo n.º 17
0
 def func(x_var):
     x = C.placeholder()
     WT = C.Parameter((
         dim,
         dim,
     ),
                      init=transform_weight_initializer,
                      name=name + '_WT')
     bT = C.Parameter(dim,
                      init=transform_bias_initializer,
                      name=name + '_bT')
     WU = C.Parameter((
         dim,
         dim,
     ),
                      init=update_weight_initializer,
                      name=name + '_WU')
     bU = C.parameter(dim, init=update_bias_initializer, name=name + '_bU')
     transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT)
     update = C.tanh(C.times(x, WU, name=name + '_U') + bU)
     return C.as_block(update * transform_gate + (1 - transform_gate) * x,
                       [(x, x_var)], 'SingleInner', 'SingleInner' + name)
Exemplo n.º 18
0
def InstanceNormalization(
        num_channel, initial_scale=1, initial_bias=0, epsilon=C.default_override_or(0.00001), name=''):
    """ Instance Normalization (2016) """
    epsilon = C.get_default_override(InstanceNormalization, epsilon=epsilon)

    dtype = C.get_default_override(None, dtype=C.default_override_or(np.float32))

    scale = C.Parameter(num_channel, init=initial_scale, name='scale')
    bias = C.Parameter(num_channel, init=initial_bias, name='bias')
    epsilon = np.asarray(epsilon, dtype=dtype)

    @C.BlockFunction('InstanceNormalization', name)
    def instance_normalization(x):
        mean = C.reduce_mean(x, axis=(1, 2))
        x0 = x - mean
        std = C.sqrt(C.reduce_mean(x0 * x0, axis=(1, 2)))
        if epsilon != 0:
            std += epsilon
        x_hat = x0 / std
        return x_hat * C.reshape(scale, (-1, 1, 1)) + C.reshape(bias, (-1, 1, 1))

    return instance_normalization
Exemplo n.º 19
0
    def build(self):
        input_kernel = C.Parameter(shape=(self._input_size, self._hidden_dim),
                                   init=self._input_initializer)
        recur_kernel = C.Parameter(shape=(self._hidden_dim, ),
                                   init=self._recurrent_initializer)
        bias = C.Parameter(shape=(self._hidden_dim), init=0)
        if self._recurrent_min_abs > 0:
            abs_kernel = C.abs(recur_kernel)
            min_abs_kernel = C.element_max(abs_kernel, self._recurrent_min_abs)
            recur_kernel = min_abs_kernel * C.element_select(
                C.greater_equal(recur_kernel, C.constant(0)), C.constant(1),
                C.constant(-1))
        if self._recurrent_max_abs:
            recur_kernel = C.clip(recur_kernel, -self._recurrent_max_abs,
                                  self._recurrent_max_abs)

        @C.Function
        def runit(h, x):
            h_t = C.times(x, input_kernel) + bias + recur_kernel * h
            return h_t

        return runit
    def MyBLSTMLayer(hidden_size=128, num_layers=2):
        W = C.Parameter((C.InferredDimension, hidden_size),
                        init=C.he_normal(1.0),
                        name='rnn_parameters')

        def _func(operand):
            return C.optimized_rnnstack(operand,
                                        weights=W,
                                        hidden_size=hidden_size,
                                        num_layers=num_layers,
                                        bidirectional=True,
                                        recurrent_op='lstm')

        return _func
Exemplo n.º 21
0
def test_parameter_set_value():
    p = C.Parameter(shape=(2, 3), init=1)
    n = np.random.randn(2, 3)
    p.value = n
    assert np.all(p.value == n.astype(p.dtype))

    n = np.reshape(np.arange(6), (2, 3))
    p.value = n
    op = plus(p, p)
    state, output = op.forward({}, op.outputs, op.outputs)
    value = output[op.output]
    assert np.all(value == 2 * n.astype(p.dtype))

    p.value = C.internal.sanitize_value(p.shape, 1.0, np.float32, None)
    assert np.all(p.value == np.ones((2, 3)))
Exemplo n.º 22
0
def test_saving_and_loading_int8_ndarray_as_attribute(tmpdir):
    model_file = str(tmpdir/'test_model.bin')
    delete_if_file_exists(model_file)

    data = np.arange(0,64, dtype=np.int8).reshape(16,4)
    dict_val = C._to_cntk_dict_value(data)

    W = C.Parameter((C.InferredDimension, 42), init=C.glorot_uniform(), dtype=np.float)
    x = C.input_variable(12, dtype=np.float)
    y = C.times(x, W)
    y.custom_attributes = {'int8_nd':dict_val}
    y.save(model_file)

    assert(os.path.isfile(model_file))

    z = C.load_model(model_file)
    int8_data = z.custom_attributes['int8_nd']
    assert(int8_data.shape == (16,4))

    assert (np.array_equal(int8_data, data))
Exemplo n.º 23
0
def test_recurrance_with_udf_without_layers():
    name = "SimpleUdf"

    def udf(a):
        return C.user_function(SimpleUdf(a, name=name))

    # input varibale and the data.
    x = C.sequence.input_variable(needs_gradient=True, shape=(2, ))
    x0 = np.reshape(np.arange(16.0, dtype=np.float32), (2, 4, 2))
    print(x0)

    # creates a recurrent loop.
    p = C.placeholder(shape=(2, ))
    past = C.sequence.past_value(p)
    z = udf(x) * udf(past) + C.Parameter((2, ), init=[1, 1])
    z.replace_placeholders({p: z.outputs[0]})

    #C.logging.graph.plot(z, "recurrent.pdf")
    out = z.eval({x: x0})
    print(out)
    expected_out = [
        np.array([1, 1, 3, 4, 13, 21, 79, 148],
                 dtype=np.float32).reshape(4, 2),
        np.array([1, 1, 11, 12, 133, 157, 1863, 2356],
                 dtype=np.float32).reshape(4, 2)
    ]
    assert np.array_equal(out, expected_out)

    gradient, result = z.grad({x: x0}, wrt=[x], outputs=[z.output])
    print(result)
    assert np.array_equal(result, expected_out)

    expected_grad = [
        np.array([0, 0, 29, 41, 21, 32, 13, 21],
                 dtype=np.float32).reshape(4, 2),
        np.array([0, 0, 181, 209, 165, 192, 133, 157],
                 dtype=np.float32).reshape(4, 2)
    ]
    print(gradient)
    assert np.array_equal(gradient, expected_grad)
Exemplo n.º 24
0
def binary_convolution(filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,                      
                      name='BinaryConvolution'):  
    '''
    Creates a binary convolution function based on the input parameters. 

    Args:
        filter_shape : shape of the filter
        num_filters  : number of filters to use
        init         : initialization function for the filter
        pad          : padding enabled or not for the filter
        strides      : overlap for this filter
        name         : name given to the binary convolution.
                        
    Returns:
        a function for performing binary convolution
    '''

    kernel_shape = (num_filters, channels) + filter_shape
    W = C.Parameter(shape=kernel_shape, init=init, name="filter")
    
    
    def convolution(operand):
        
        bcv_operand_p = C.placeholder(
            operand.shape, operand.dynamic_axes, name="operand")
        
        bcv = C.convolution(
                    CustomMultibit(W, 1), 
                    CustomMultibit(bcv_operand_p, 1), 
                    auto_padding=[False, pad, pad], 
                    strides=[strides])

        return  C.as_block(bcv, [(bcv_operand_p, operand)], name)
                  
    return convolution
Exemplo n.º 25
0
def test_large_model_serialization_float(tmpdir):
    import os
    from cntk.layers import Recurrence, LSTM, Dense

    type_size = np.dtype(np.float32).itemsize
    two_gb = 2**31
    size = (2097152 + 4, 256, 512, 4096)
    assert size[0] * size[1] * type_size > two_gb

    device = C.device.cpu()
    i = C.sequence.input(size[0])
    w = C.Parameter((size[0], size[1]),
                    init=C.uniform(3.0, seed=12345),
                    device=device)
    e = C.times(i, w)

    h_fwd = Recurrence(LSTM(size[2]))(e)
    h_bwd = Recurrence(LSTM(size[2]), go_backwards=True)(e)
    h_last_fwd = C.sequence.last(h_fwd)
    h_first_bwd = C.sequence.first(h_bwd)
    t = C.splice(h_last_fwd, h_first_bwd)

    z1 = Dense(size[2], activation=C.relu)(t)
    z = Dense(2, activation=None)(z1)

    filename = str(tmpdir / 'test_large_model_serialization_float.out')
    z.save(filename)

    assert os.path.getsize(filename) > two_gb

    y = C.Function.load(filename, device=device)

    assert (len(z.parameters) == len(y.parameters))

    for param_pair in zip(z.parameters, y.parameters):
        assert param_pair[0].shape == param_pair[1].shape
        assert np.allclose(param_pair[0].value, param_pair[1].value)
Exemplo n.º 26
0
import cntk as C
import numpy as np

dataset_size = 200000
X = np.random.rand(dataset_size, 2)
labels = np.zeros((dataset_size, 3))
labels[X[:, 0] > X[:, 1]] = [0, 0, 1]
labels[X[:, 0] <= X[:, 1]] = [1, 0, 0]
labels[X[:, 1] + X[:, 0] > 1] = [0, 1, 0]

init = C.initializer.normal(0.01)

theta1 = C.Parameter(shape=(2, 12), init=init)
bias1 = C.Parameter(shape=(1, 12), init=init)

theta2 = C.Parameter(shape=(12, 3), init=init)
bias2 = C.Parameter(shape=(
    1,
    3,
), init=init)

x = C.input_variable(shape=(2, ), needs_gradient=False)
t = C.input_variable(shape=(3, ), needs_gradient=False)


def forward(x):
    y = C.times(x, theta1) + C.squeeze(bias1, 0)
    y = C.element_max(y, 0.)
    return C.times(y, theta2) + C.squeeze(bias2, 0)

Exemplo n.º 27
0
    # Our model expects float32 features, and cross-entropy expects one-hot encoded labels.
    Y = scipy.sparse.csr_matrix((np.ones(N, np.float32), (range(N), Y)),
                                shape=(N, num_classes))
    X = X.astype(np.float32)
    return X, Y


X_train, Y_train = generate_synthetic_data(20000)
X_test, Y_test = generate_synthetic_data(1024)

# Define the CNTK model function. The model function maps input data to
# predictions (here: 2-dimensional inputs --> 2 scores).
# This simple logistic-regression model just uses a linear transform.
data = cntk.input_variable(input_dim)
W = cntk.Parameter((input_dim, num_classes),
                   init=cntk.glorot_uniform(),
                   name='W')
b = cntk.Parameter((num_classes, ), init=0, name='b')
model = cntk.times(data, W) + b

# Define the CNTK criterion function. A criterion function maps
# (input vectors, labels) to a loss function and an optional additional
# metric. The loss function is used to train the model parameters.
# We use cross entropy as a loss function.
label_one_hot = cntk.input_variable(num_classes, is_sparse=True)
loss = cntk.cross_entropy_with_softmax(
    model,
    label_one_hot)  # this applies softmax to model's output under the hood
metric = cntk.classification_error(model, label_one_hot)
criterion = cntk.combine(
    [loss, metric])  # criterion is a tuple-valued function (loss, metric)
Exemplo n.º 28
0
def test_parameter_value(value):
    c = C.Parameter(init=value)
    assert np.allclose(c.value, value)
Exemplo n.º 29
0
def main():
    print("version", C.__version__)
    bs = 1
    n_chans = 1

    sigma_s = 16
    sigma_r = 12

    # 4x4x1024x1024
    # 4x12x64x64

    sz = 256
    # sz = 1024
    small_sz = sz // sigma_s

    yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz))
    cc, bb = np.meshgrid(np.arange(0, n_chans), np.arange(0, bs))

    xx = np.expand_dims(xx, 0)
    xx = np.expand_dims(xx, 0)
    yy = np.expand_dims(yy, 0)
    yy = np.expand_dims(yy, 0)

    bb = np.expand_dims(bb, 2)
    bb = np.expand_dims(bb, 3)
    cc = np.expand_dims(cc, 2)
    cc = np.expand_dims(cc, 3)

    # Compute graph
    grid = C.Parameter([bs, n_chans, sigma_r, small_sz, small_sz], )
    # grid = C.input_variable(
    #     [bs, n_chans, sigma_r, small_sz, small_sz],
    #     dynamic_axes=[], needs_gradient=True)
    guide = C.input_variable([bs, sz, sz],
                             dynamic_axes=[],
                             needs_gradient=True)
    guide_non_diff = C.input_variable([bs, sz, sz], dynamic_axes=[])

    # Coordinates
    xx = C.Constant(xx, xx.shape)
    yy = C.Constant(yy, yy.shape)
    cc = C.Constant(cc, cc.shape)
    bb = C.Constant(bb, bb.shape)

    gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord(
        guide, xx, yy, sz, small_sz, sigma_r, bs)

    # Trilerp weights
    wx = (gx_d - 0.5 - fx_d)
    wy = (gy_d - 0.5 - fy_d)
    wz = C.abs(gz_d - 0.5 - fz_d)

    # Enclosing cell
    gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(guide_non_diff, xx, yy, sz,
                                                    small_sz, sigma_r, bs)

    output_components = []
    for ix, x in enumerate([fx, cx]):
        wx_ = (1 - wx) if ix == 0 else wx
        for iy, y in enumerate([fy, cy]):
            wy_ = (1 - wy) if iy == 0 else wy
            for iz, z in enumerate([fz, cz]):
                wz_ = (1 - wz) if iz == 0 else wz
                linear_idx = x + small_sz * (y + small_sz *
                                             (z + sigma_r *
                                              (cc + n_chans * bb)))

                # Flatten data for gather op
                flat_grid = C.reshape(
                    grid, [bs * small_sz * small_sz * sigma_r * n_chans])
                flat_linear_idx = C.reshape(linear_idx,
                                            [bs * n_chans * sz * sz])

                # Slice
                interp = C.gather(flat_grid, flat_linear_idx)
                interp_fsz = C.reshape(interp, [bs, n_chans, sz, sz])
                output_components.append(interp_fsz * wz_ * wx_ * wy_)

    out = sum(output_components)
    loss = C.squared_error(out, guide)

    # svg = C.logging.graph.plot(out, "/output/graph.svg")

    grid_data = np.random.uniform(size=(bs, n_chans, sigma_r, small_sz,
                                        small_sz)).astype(np.float32)

    # guide_data = np.random.uniform(
    #     size=(bs, sz, sz)).astype(np.float32)
    guide_data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype(
        np.float32)
    guide_data = np.expand_dims(guide_data, 0) / 255.0

    inputs = {guide: guide_data, guide_non_diff: guide_data}
Exemplo n.º 30
0
def BilateralSlice(sz, i_chans, o_chans, grid_sz=64, sigma_r=8):
  gsize = [(i_chans+1)*o_chans, sigma_r, grid_sz, grid_sz]
  grid = C.Parameter(gsize, 
                     name="grid", init=np.random.uniform(size=gsize))
  guide_scale = C.Parameter((1, ), 
                     name="guide_scale", init=np.ones((1, )))
  grid_scale = C.Parameter((1, ), 
                     name="grid_scale", init=np.ones((1, )))
  im_scale = C.Parameter((1, ), 
                     name="im_scale", init=np.ones((1, )))


  yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz))
  xx = np.expand_dims(xx, 0)
  yy = np.expand_dims(yy, 0)
  cc = np.arange(0, i_chans+1)
  cc = np.expand_dims(cc, 1)
  cc = np.expand_dims(cc, 2)
  xx = C.Constant(xx, xx.shape)
  yy = C.Constant(yy, yy.shape)
  cc = C.Constant(cc, cc.shape)


  @C.functions.BlockFunction("BilateralSlice", "bilateral_slice")
  def bilateral_slice(im, guide, guide_no_grad):
    # Flatten data for gather op
    flat_grid = grid_scale*C.reshape(grid, [grid_sz*grid_sz*sigma_r*o_chans*(i_chans+1)])
    # flat_grid_u = C.unpack_batch(flat_grid)

    # Make sure we do sth that requires the gradient w.r.t guide
    scaled_guide = guide_scale*guide  
    gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord(
        scaled_guide, xx, yy, sz, grid_sz, sigma_r)
    wx = C.abs(gx_d - 0.5 - fx_d)
    wy = C.abs(gy_d - 0.5 - fy_d)
    wz = C.abs(gz_d - 0.5 - fz_d)

    # Enclosing cell
    gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(
        guide_no_grad, xx, yy, sz, grid_sz, sigma_r)

    out_chans = []
    for chan in range(o_chans):
      output_components = []
      for ix, x in enumerate([fx, cx]):
        wx_ = (1-wx) if ix == 0 else wx
        for iy, y in enumerate([fy, cy]):
          wy_ = (1-wy) if iy == 0 else wy
          for iz, z in enumerate([fz, cz]):
            wz_ = (1-wz) if iz == 0 else wz

            linear_idx = x + grid_sz*(y + grid_sz*(z + sigma_r*(cc + chan*(i_chans+1))))
            flat_linear_idx = C.reshape(linear_idx, [(i_chans+1)*sz*sz])
            # Slice
            interp = C.gather(flat_grid, flat_linear_idx)
            interp_fsz = C.reshape(interp, [i_chans+1, sz, sz])*wx_*wy_*wz_
            output_components.append(interp_fsz)

      out_coeffs = sum(output_components)
      out_chan = C.reduce_sum(out_coeffs[:i_chans]*(im_scale*im) + out_coeffs[-1], 0)
      out_chans.append(out_chan)
    out = C.splice(*out_chans, axis=0)

    return out
  
  return bilateral_slice