def test_trainer_with_some_params_not_learned():
    input_dim = 2
    proj_dim = 2
    x = C.input_variable(shape=(input_dim,))
    W = parameter(shape=(input_dim, proj_dim), init=C.glorot_uniform())
    B = parameter(shape=(proj_dim,), init=C.glorot_uniform())
    t = times(x, W)
    z = t + B

    W_orig_value = W.value
    B_orig_value = B.value

    labels = C.input_variable(shape=(proj_dim,))
    ce = cross_entropy_with_softmax(z, labels)
    pe = classification_error(z, labels)

    lr_per_sample = C.learning_parameter_schedule(0.1, minibatch_size =1)
    trainer = C.Trainer(z, (ce, pe), C.sgd([W], lr_per_sample))

    x_value = [[1, 1],[2, 2]]
    label_value = [[0, 1], [1, 0]]
    arguments = {x: x_value, labels: label_value}

    num_iters = 3
    for i in range(num_iters):
        trainer.train_minibatch(arguments)

        assert np.array_equal(B.value, B_orig_value)
        assert not np.array_equal(W.value, W_orig_value)
        W_orig_value = W.value

    trainer.test_minibatch(arguments)
def test_convert_optimized_rnnstack(num_layers, bidirectional, recurrent_op, device_id):
    if device_id == -1:
        pytest.skip('only runs on GPU')

    input_dim = 5
    hidden_dim = 3
    data = [np.random.random((20,input_dim)).astype(np.float32), np.random.random((10,input_dim)).astype(np.float32), np.random.random((40,input_dim)).astype(np.float32)]
    input_var = C.sequence.input_variable(shape=(input_dim,))
    
    W1 = C.parameter((-1,1), init = C.glorot_uniform())
    W2 = C.parameter((-1,1), init = C.glorot_uniform())
    cudnn_rnn1 = C.optimized_rnnstack(input_var, W1, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op)
    dense1 = C.layers.Dense(hidden_dim)(cudnn_rnn1)
    cudnn_rnn2 = C.optimized_rnnstack(dense1, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op)
    dense2 = C.layers.Dense(hidden_dim)(cudnn_rnn2)
    cudnn_rnn3 = C.optimized_rnnstack(dense2, W2, hidden_dim, num_layers=num_layers, bidirectional=bidirectional, recurrent_op=recurrent_op) # test shared parameter W2
    
    def blocked(d):
        blocked_W = C.parameter((-1,d), init = C.glorot_uniform())
        @C.layers.BlockFunction('', '')
        def func(x):
            return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm')
        return func
    
    cudnn_model = C.layers.Sequential([blocked(hidden_dim), blocked(2*hidden_dim), blocked(3*hidden_dim)])(cudnn_rnn3)
    cudnn_out = cudnn_model.eval({input_var:data})

    model = C.misc.convert_optimized_rnnstack(cudnn_model)

    # make sure original cudnn model is intact
    cudnn_out2 = cudnn_model.eval({input_var:data})
    assert all(np.allclose(cudnn_out[i], cudnn_out2[i]) for i in range(len(cudnn_out)))

    model_out = model.eval({model.arguments[0]:data})
    assert all(np.allclose(cudnn_out[i], model_out[i]) for i in range(len(cudnn_out)))
Пример #3
0
def ffnet(learner, trainer=None):
    inputs = 5
    outputs = 3
    layers = 2
    hidden_dimension = 3

    if trainer is None:
        # input variables denoting the features and label data
        features = C.input_variable((inputs), np.float32)
        label = C.input_variable((outputs), np.float32)

        # Instantiate the feedforward classification model
        my_model = Sequential([
            Dense(hidden_dimension,
                  activation=C.sigmoid,
                  init=C.glorot_uniform(seed=98052)),
            Dense(outputs, init=C.glorot_uniform(seed=98052))
        ])
        z = my_model(features)

        ce = C.cross_entropy_with_softmax(z, label)
        pe = C.classification_error(z, label)

        # Instantiate the trainer object to drive the model training
        progress_printer = ProgressPrinter(0)
        trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)],
                            [progress_printer])
    else:
        features = trainer.loss_function.arguments[0]
        label = trainer.loss_function.arguments[1]

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error, trainer
Пример #4
0
    def attention_layer(self, context, query, layer):

        q_processed = C.placeholder(shape=(2*self.hidden_dim,))
        p_processed = C.placeholder(shape=(2*self.hidden_dim,))

        qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs

        wq = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
        wp = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
        wg = C.parameter(shape=(8*self.hidden_dim, 8*self.hidden_dim), init=C.glorot_uniform())
        v = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform())

        # seq[tensor[2d]] p_len x 2d
        wpt = C.reshape(C.times(p_processed, wp), (-1, 2*self.hidden_dim))

        # q_len x 2d
        wqt = C.reshape(C.times(qvw, wq), (-1, 2*self.hidden_dim))
        
        # seq[tensor[q_len]]
        S = C.reshape(C.times(C.tanh(C.sequence.broadcast_as(wqt, p_processed) + wpt), v), (-1))

        qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, p_processed)

        # seq[tensor[q_len]]
        S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30))
        
        # seq[tensor[q_len]]
        A = C.softmax(S, axis=0)

        # seq[tensor[2d]]
        swap_qvw = C.swapaxes(qvw)
        cq = C.reshape(C.reduce_sum(A * C.sequence.broadcast_as(swap_qvw, A), axis=1), (-1))

        # seq[tensor[4d]]
        uc_concat = C.splice(p_processed, cq, p_processed * cq, cq * cq)
        
        # seq[tensor[4d]]
        gt = C.tanh(C.times(uc_concat, wg))
        
        # seq[tensor[4d]]
        uc_concat_star = gt * uc_concat
 
        # seq[tensor[4d]]
        vp = C.layers.Sequential([
            C.layers.Dropout(self.dropout),
            OptimizedRnnStack(self.hidden_dim, bidirectional=True, 
                use_cudnn=self.use_cudnn, name=layer+'_attention_rnn')])(uc_concat_star)
        
        return C.as_block(
            vp,
            [(p_processed, context), (q_processed, query)],
            'attention_layer',
            'attention_layer')
Пример #5
0
def MultiHeadAttentionBlock(num_heads, model_dim, obey_sequence_order: bool = None, max_seq_len: int = None,
                            key_init=default_override_or(C.glorot_uniform()), key_init_bias=default_override_or(0),
                            query_init=default_override_or(C.glorot_uniform()), query_init_bias=default_override_or(0),
                            value_init=default_override_or(C.glorot_uniform()), value_init_bias=default_override_or(0),
                            init=default_override_or(C.glorot_uniform()), init_bias=default_override_or(0),
                            initial_scale=1, initial_bias=0, name=''):
    """ Multi head attention block as described in "Attention is all you need", https://arxiv.org/abs/1706.03762

    Multi-head attention block comes with a residual connection and a layer norm.

    Example:
        a = C.sequence.input_variable(10)
        b = MultiHeadAttentionBlock(2, 10)(a, a, a)

        assert b.shape == (10, )

    Arguments:
        num_heads (int): number of attention heads
        model_dim (int): number of hidden dim in final output of multi-head attention
        obey_sequence_order: do not let attention peek into future values
        max_seq_len: max sequence length possible, used to ensure that sequence order is obeyed
        key_init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to :func:`~cntk.initializer.glorot_uniform` ): initial value of weights `W`
        key_init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        query_init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to :func:`~cntk.initializer.glorot_uniform` ): initial value of weights `W`
        query_init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        value_init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to :func:`~cntk.initializer.glorot_uniform` ): initial value of weights `W`
        value_init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        init (scalar or NumPy array or :mod:`cntk.initializer`, defaults to :func:`~cntk.initializer.glorot_uniform` ): initial value of weights `W`
        init_bias (scalar or NumPy array or :mod:`cntk.initializer`, defaults to 0): initial value of weights `b`
        initial_scale (float, default 1): initial value for the ``scale`` parameter aka gamma
        initial_bias (float, default 0): initial value for the ``bias`` parameter aka beta

    Returns:
        :class:`~cntk.ops.functions.Function`:

    """
    attention_layer = MultiHeadAttention(num_heads, model_dim, obey_sequence_order, max_seq_len,
                                         key_init=key_init, key_init_bias=key_init_bias,
                                         query_init=query_init, query_init_bias=query_init_bias,
                                         value_init=value_init, value_init_bias=value_init_bias,
                                         init=init, init_bias=init_bias, name='MultiheadAttention')

    layernorm = LayerNormalization(initial_scale=initial_scale, initial_bias=initial_bias, name='LayerNorm')

    @C.Function
    def inner(query, key, value):
        attended = attention_layer(query, key, value)
        skip_connect_attended = attended + query
        normed_skip_connect_attended = layernorm(skip_connect_attended)
        return normed_skip_connect_attended

    return _inject_name(inner, name)
Пример #6
0
def linear_layer(input_var, output_dim):
    input_dim = input_var.shape[0]
    # Introduce model parameters
    weight_param = C.parameter(shape=(output_dim, input_dim), name="weights", init=C.glorot_uniform())
    bias_param = C.parameter(shape=(output_dim, 1), name="biases", init=C.glorot_uniform())
    # Reshape to facilitate matrix multiplication
    input_reshaped = C.reshape(input_var, (input_dim, 1))
    # Weighted sums
    params['w'], params['b'] = weight_param, bias_param
    part1 = C.times(weight_param, input_reshaped)
    # Add biases
    part2 = part1 + bias_param
    # Return 1-D representation
    return C.reshape(part2, (output_dim))
Пример #7
0
def test_convert_optimized_rnnstack(num_layers, bidirectional, recurrent_op,
                                    device_id):
    if device_id == -1:
        pytest.skip('only runs on GPU')

    input_dim = 5
    hidden_dim = 3
    data = [
        np.random.random((20, input_dim)).astype(np.float32),
        np.random.random((10, input_dim)).astype(np.float32),
        np.random.random((40, input_dim)).astype(np.float32)
    ]
    input_var = C.sequence.input_variable(shape=(input_dim, ))

    W1 = C.parameter((-1, 1), init=C.glorot_uniform())
    W2 = C.parameter((-1, 1), init=C.glorot_uniform())
    cudnn_rnn1 = C.optimized_rnnstack(input_var,
                                      W1,
                                      hidden_dim,
                                      num_layers=num_layers,
                                      bidirectional=bidirectional,
                                      recurrent_op=recurrent_op)
    dense1 = C.layers.Dense(hidden_dim)(cudnn_rnn1)
    cudnn_rnn2 = C.optimized_rnnstack(dense1,
                                      W2,
                                      hidden_dim,
                                      num_layers=num_layers,
                                      bidirectional=bidirectional,
                                      recurrent_op=recurrent_op)
    dense2 = C.layers.Dense(hidden_dim)(cudnn_rnn2)
    cudnn_model = C.optimized_rnnstack(
        dense2,
        W2,
        hidden_dim,
        num_layers=num_layers,
        bidirectional=bidirectional,
        recurrent_op=recurrent_op)  # test shared parameter W2
    cudnn_out = cudnn_model.eval({input_var: data})

    model = C.utils.convert_optimized_rnnstack(cudnn_model)

    # make sure original cudnn model is intact
    cudnn_out2 = cudnn_model.eval({input_var: data})
    assert all(
        np.allclose(cudnn_out[i], cudnn_out2[i])
        for i in range(len(cudnn_out)))

    model_out = model.eval({model.arguments[0]: data})
    assert all(
        np.allclose(cudnn_out[i], model_out[i]) for i in range(len(cudnn_out)))
Пример #8
0
    def attention_layer(self, context, query):
        q_processed = C.placeholder(shape=(2 * self.hidden_dim, ))
        c_processed = C.placeholder(shape=(2 * self.hidden_dim, ))

        #convert query's sequence axis to static
        qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs

        # This part deserves some explanation
        # It is the attention layer
        # In the paper they use a 6 * dim dimensional vector
        # here we split it in three parts because the different parts
        # participate in very different operations
        # so W * [h; u; h.* u] becomes w1 * h + w2 * u + w3 * (h.*u)
        ws1 = C.parameter(shape=(2 * self.hidden_dim, 1),
                          init=C.glorot_uniform())
        ws2 = C.parameter(shape=(2 * self.hidden_dim, 1),
                          init=C.glorot_uniform())
        ws3 = C.parameter(shape=(1, 2 * self.hidden_dim),
                          init=C.glorot_uniform())
        att_bias = C.parameter(shape=(), init=0)

        wh = C.times(c_processed, ws1)
        wu = C.reshape(C.times(qvw, ws2), (-1, ))
        whu = C.reshape(
            C.reduce_sum(c_processed *
                         C.sequence.broadcast_as(qvw * ws3, c_processed),
                         axis=1), (-1, ))
        S = wh + whu + C.sequence.broadcast_as(wu, c_processed) + att_bias
        # mask out values outside of Query, and fill in gaps with -1e+30 as neutral value for both reduce_log_sum_exp and reduce_max
        qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, c_processed)
        S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30))
        q_attn = C.reshape(C.softmax(S), (-1, 1))
        #q_attn = print_node(q_attn)
        c2q = C.reshape(
            C.reduce_sum(C.sequence.broadcast_as(qvw, q_attn) * q_attn,
                         axis=0), (-1))

        max_col = C.reduce_max(S)
        c_attn = C.sequence.softmax(max_col)

        htilde = C.sequence.reduce_sum(c_processed * c_attn)
        q2c = C.sequence.broadcast_as(htilde, c_processed)
        q2c_out = c_processed * q2c

        att_context = C.splice(c_processed, c2q, c_processed * c2q, q2c_out)

        return C.as_block(att_context, [(c_processed, context),
                                        (q_processed, query)],
                          'attention_layer', 'attention_layer')
Пример #9
0
def LinearAttentionModel(hidden_dim: int, model_dim: int,
                         key_init=default_override_or(C.glorot_uniform()), key_init_bias=default_override_or(0),
                         query_init=default_override_or(C.glorot_uniform()), query_init_bias=default_override_or(0),
                         value_init=default_override_or(C.glorot_uniform()), value_init_bias=default_override_or(0),
                         name=''):
    """ Convenience wrapper in the style of cntk.layers.AttentionModel """
    attention = LinearAttention(hidden_dim=hidden_dim, model_dim=model_dim,
                                key_init=key_init, key_init_bias=key_init_bias,
                                query_init=query_init, query_init_bias=query_init_bias,
                                value_init=value_init, value_init_bias=value_init_bias, name=name)

    def model(encoder_hidden_state, decoder_hidden_state):
        return attention(decoder_hidden_state, encoder_hidden_state, encoder_hidden_state)

    return model
Пример #10
0
def ffnet(optimizer, num_minibatches_to_train, learning_rate_func, lr_args,
          learner_kwargs):
    inputs = 2
    outputs = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential([
        Dense(hidden_dimension,
              activation=C.sigmoid,
              init=C.glorot_uniform(seed=SEED)),
        Dense(outputs, init=C.glorot_uniform(seed=SEED))
    ])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr = learning_rate_func(0.125, *lr_args)
    progress_printer = ProgressPrinter(0)
    learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(
        z.parameters, lr, **learner_kwargs)

    trainer = C.Trainer(z, (ce, pe), [learner], progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})

    test_features, test_labels = generate_random_data(minibatch_size, inputs,
                                                      outputs)
    avg_error = trainer.test_minibatch({
        features: test_features,
        label: test_labels
    })
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return z.parameters
Пример #11
0
def test_output_subset_evaluation(device_id):
    try:
        gpu_device = C.gpu(0)
    except ValueError:
        pytest.skip('Test only runs when GPU available')

    device = cntk_device(device_id)
    x1 = C.input_variable(shape=())
    op1 = C.constant(value=1, shape=(1), device=device) + (C.constant(value=1, shape=(1), device=device) + x1)

    x2 = C.input_variable(shape=(1))

    # Deliberately locate the parameter on a different device
    # instead of the actual compute target device, so that
    # if we try to use this parameter, it results in an error
    if (device.type() == 0):
        parameter_device = gpu_device
    else:
        parameter_device = C.cpu()
    p = C.parameter(shape=(1), init=C.glorot_uniform(), device=parameter_device)
    op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p

    op = C.combine([op1, op2]);

    _, result = op.forward({x1 : np.asarray([1, 2, 3])}, [op1], device=device)
    assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
Пример #12
0
def _create_convolution_model():

    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = feature_var
        h = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=8,
                                   strides=(2, 2),
                                   pad=True,
                                   name='first_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=16,
                                   strides=(2, 2),
                                   pad=True,
                                   name='second_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=16,
                                   strides=(1, 1),
                                   pad=True,
                                   name='thrid_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=16,
                                   strides=(1, 1),
                                   pad=True,
                                   name='fourth_convo')(h)

        r = C.layers.Dense(num_classes, activation=None, name='classify')(h)
    return r
Пример #13
0
def create_model(features, num_hidden_layers, hidden_layer_dim):
    with C.layers.default_options(init=C.glorot_uniform(), activation=C.sigmoid):
        h = features
        for _ in range(num_hidden_layers):
            h = C.layers.Dense(hidden_layers_dim)(h)
        last_layer = C.layers.Dense(num_classes, activation = None)
        return last_layer(h)
def BinaryConvolution(operand,
                      filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,
                      bias=True,
                      init_bias=0,
                      op_name='BinaryConvolution', name=''):
    """ arguments:
            operand: tensor to convolve
            filter_shape: tuple indicating filter size
            num_filters: number of filters to use 
            channels: number of incoming channels
            init: type of initialization to use for weights
    """
    kernel_shape = (num_filters, channels) + filter_shape
    W = C.parameter(shape=kernel_shape, init=init, name="filter")

    binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand")
    binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides])
    r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve')

    bias_shape = (num_filters, 1, 1)
    b = C.parameter(shape=bias_shape, init=init_bias, name="bias")
    r = r + b

    # apply learnable param relu
    P = C.parameter(shape=r.shape, init=init, name="prelu")
    r = C.param_relu(P, r)
    return r
Пример #15
0
def test_rnn(device_id):
    if device_id == -1:
        pytest.skip('Test only runs on GPU')

    batch_size = 8
    sequence_len = 100
    vocab_dim = 20
    embed_dim = 10
    hidden_dim = 7
    input = C.cast(C.sequence.input_variable(()), np.float16)
    with C.default_options(dtype=np.float16):
        embed = C.layers.Embedding(embed_dim)(C.one_hot(input,
                                                        num_classes=vocab_dim,
                                                        sparse_output=False))
        z = C.layers.Recurrence(C.layers.LSTM(hidden_dim))(embed)

    feed = np.floor(
        np.random.rand(batch_size, sequence_len).astype(np.float32) *
        (vocab_dim - 1))
    z.grad(feed, wrt=z.parameters)

    num_layers = 2
    W = C.parameter((C.InferredDimension, embed_dim),
                    init=C.glorot_uniform(),
                    dtype=np.float16)
    with C.default_options(dtype=np.float16):
        z = C.optimized_rnnstack(embed, W, hidden_dim, num_layers)

    feed = np.floor(
        np.random.rand(batch_size, sequence_len).astype(np.float32) *
        (vocab_dim - 1))
    z.grad(feed, wrt=z.parameters)
def create_model_with_pooling(features):
    with C.layers.default_options(init=C.glorot_uniform(),
                                  activation=C.leaky_relu):
        h = features

        h = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=8,
                                   strides=(1, 1),
                                   pad=True,
                                   name='first_conv')(h)
        h = C.layers.AveragePooling(filter_shape=(5, 5),
                                    strides=(2, 2),
                                    name='first_pool')(h)
        h = C.layers.Convolution2D(filter_shape=(5, 5),
                                   num_filters=16,
                                   strides=(1, 1),
                                   pad=True,
                                   name='second_conv')(h)
        h = C.layers.AveragePooling(filter_shape=(5, 5),
                                    strides=(2, 2),
                                    name='second_pool')(h)
        r = C.layers.Dense(num_output_classes,
                           activation=None,
                           name='classify')(h)
        return r
Пример #17
0
def test_output_subset_evaluation(device_id):
    try:
        gpu_device = C.gpu(0)
    except ValueError:
        pytest.skip('Test only runs when GPU available')

    device = cntk_device(device_id)
    x1 = C.input_variable(shape=())
    op1 = C.constant(value=1, shape=(1), device=device) + (
        C.constant(value=1, shape=(1), device=device) + x1)

    x2 = C.input_variable(shape=(1))

    # Deliberately locate the parameter on a different device
    # instead of the actual compute target device, so that
    # if we try to use this parameter, it results in an error
    if (device.type() == 0):
        parameter_device = gpu_device
    else:
        parameter_device = C.cpu()
    p = C.parameter(shape=(1),
                    init=C.glorot_uniform(),
                    device=parameter_device)
    op2 = (x2 - C.constant(value=10, shape=(1), device=device)) - p

    op = C.combine([op1, op2])

    _, result = op.forward({x1: np.asarray([1, 2, 3])}, [op1], device=device)
    assert np.array_equal(result[op1], np.asarray([[3], [4], [5]]))
Пример #18
0
def test_cntk_cudnn():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_lstm()
    else:
        cntk_baseline_lstm()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
        
    input_var = C.sequence.input(shape=(in_dim))
    data = {input_var:data_cntk}
    ci.set_data(data)
    ci.set_workdir(workdir)

    W = C.parameter((-1,dim,), init=C.glorot_uniform())
    cudnn_fwbw = C.optimized_rnnstack(input_var, W, dim, 1, bidirectional=True, recurrent_op='lstm')
    ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn', var_type=cstk.RnnAttr,
          attr=cstk.RnnAttr(bidirectional=True, op_type='lstm', input_dim=in_dim, hidden_dim=dim, forget_bias=0))
    ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn_out')
    
    ci.assign('cntk_birnn_cudnn', load=True, load_name='cntk_birnn')
    assert ci.compare('cntk_birnn_cudnn_out', compare_name='cntk_birnn_out')

    ci.fetch('cntk_birnn_cudnn', save=True)
    ci.assign('cntk_birnn_cudnn', load=True)
    assert ci.compare('cntk_birnn_cudnn_out', compare_name='cntk_birnn_out')
    
    ci.reset()
Пример #19
0
def test_nce_loss(classes, xdim, batch, expected_value, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]

    from cntk.losses import nce_loss
    import scipy

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape(
        (batch, xdim)) / (batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10, 10 * batch + 1, 10))
    indptr = list(range(batch + 1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr),
                                 shape=(batch, classes))

    q = np.arange(classes, dtype=dt) + 1

    b = C.parameter((classes, 1), init=-np.log(classes))
    W = C.parameter((classes, C.InferredDimension),
                    init=C.glorot_uniform(seed=98052))

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    v = loss.grad({x: x0, y: y0}, wrt=loss.parameters, as_numpy=False)
    for key in v:
        assert v[
            key].is_sparse, "gradient of nce_loss with respect to %s is not sparse" % key
    losses = np.zeros((100, batch))
    for i in range(100):
        losses[i, :] = loss.eval({x: x0, y: y0})
    assert np.allclose(np.mean(losses, axis=0), AA(expected_value))
Пример #20
0
	def create_model(self, features):
		with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.relu):
			h = features
			
			h = cntk.layers.Convolution2D(filter_shape=(3,3), 
										num_filters=16, 
										strides=(1,1), 
										pad=True, name="first_conv")(h)
			h = cntk.layers.MaxPooling(filter_shape=(2,2), 
										strides=(2,2), name="first_max")(h)
			h = cntk.layers.Convolution2D(filter_shape=(3,3), 
										num_filters=32, 
										strides=(1,1), 
										pad=True, name="second_conv")(h)
			h = cntk.layers.MaxPooling(filter_shape=(2,2), 
										strides=(2,2), name="second_max")(h)
			h = cntk.layers.Convolution2D(filter_shape=(3,3), 
										num_filters=64, 
										strides=(1,1), 
										pad=True, name="third_conv")(h)
			h = cntk.layers.MaxPooling(filter_shape=(2,2), 
										strides=(2,2), name="third_max")(h)	
			h = cntk.layers.Dense(500, name="fc0")(h)
			r = cntk.layers.Dense(self.num_output_classes, activation = None, name="classify")(h)			
			return r
Пример #21
0
def test_nce_loss(classes, xdim, batch, expected_value, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]

    from cntk.losses import nce_loss
    import scipy

    x = C.input_variable(xdim, needs_gradient=True)
    y = C.input_variable(classes, is_sparse=True)

    x0 = np.arange(batch * xdim, dtype=dt).reshape((batch, xdim))/(batch * xdim)
    data = np.ones(batch, dtype=dt)
    indices = list(range(10,10*batch+1,10))
    indptr = list(range(batch+1))
    y0 = scipy.sparse.csr_matrix((data, indices, indptr), shape=(batch, classes))

    q = np.arange(classes, dtype=dt) + 1

    b = C.parameter((classes, 1), init=-np.log(classes))
    W = C.parameter((classes, C.InferredDimension), init=C.glorot_uniform(seed=98052))

    loss = C.nce_loss(W, b, x, y, q, seed=98052)
    v = loss.grad({x:x0, y:y0}, wrt=loss.parameters, as_numpy=False)
    for key in v:
        assert v[key].is_sparse, "gradient of nce_loss with respect to %s is not sparse"%key
    losses = np.zeros((100,batch))
    for i in range(100):
        losses[i,:] = loss.eval({x:x0, y:y0})
    assert np.allclose(np.mean(losses, axis=0), AA(expected_value))
Пример #22
0
    def embed(self):
        npglove = np.zeros((self.wg_dim, 1024 + 300), dtype=np.float32)
        hf = h5py.File(
            os.path.join(self.abs_path, '../data/elmo_embedding.bin'), 'r')

        with open(os.path.join(self.abs_path, '../data/glove.840B.300d.txt'),
                  encoding='utf-8') as f:
            for line in f:
                parts = line.split()
                word = parts[0].lower()
                if word in self.vocab:
                    try:
                        if len(parts) == 301:
                            npglove[self.vocab[word], :300] = np.asarray(
                                [float(p) for p in parts[-300:]])
                            npglove[self.vocab[word],
                                    300:] = np.average(hf[word][:], axis=0)
                    except:
                        npglove[self.vocab[word],
                                300:] = np.average(hf['<UNK>'][:], axis=0)

        glove = C.constant(npglove)
        nonglove = C.parameter(shape=(self.wn_dim, 1024 + 300),
                               init=C.glorot_uniform(),
                               name='TrainableE')

        def func(wg, wn):
            return C.times(wg, glove) + C.times(wn, nonglove)

        return func
Пример #23
0
    def word_glove(self):
        # load glove
        if os.path.isfile('glove300.model'):
            print('[BUILD] load glove300.model')
            return C.load_model('glove300.model')
        npglove = np.zeros((self.wg_dim, self.word_emb_dim), dtype=np.float32)
        with open(os.path.join(self.abs_path, self.word_embed_file),
                  encoding='utf-8') as f:
            for line in f:
                parts = line.split()
                word = parts[0].lower()
                if self.vocab.get(word, self.wg_dim) < self.wg_dim:
                    npglove[self.vocab[word], :] = np.asarray(
                        [float(p) for p in parts[-300:]])
        glove = C.constant(npglove)
        nonglove = C.parameter(shape=(len(self.vocab) - self.wg_dim,
                                      self.word_emb_dim),
                               init=C.glorot_uniform(),
                               name='TrainableE')

        @C.Function
        def func(wg, wn):
            return C.times(wg, glove) + C.times(wn, nonglove)

        func.save('glove300.model')
        print('[BUILD] save glove300.model')
        return func
Пример #24
0
def create_network(para, verbose=False):
    with cntk.layers.default_options(init=cntk.glorot_uniform(), activation=cntk.ops.relu):
        # In order to accelerate the debugging step, we choose a simple structure with only 2 parameters

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[0],
                                      strides=(1, 1), pad=True, name='C1')(network_input / 255.0)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2), )(h)

        h = cntk.layers.Convolution2D(filter_shape=(5, 5), num_filters=para[1],
                                      strides=(1, 1), pad=True, name='C2')(h)
        h = cntk.layers.layers.MaxPooling(filter_shape=(5, 5), strides=(2, 2))(h)

        h = cntk.layers.Convolution2D(filter_shape=(3, 3), num_filters=para[2],
                                      strides=(1, 1), pad=True, name='C2')(h)

        h = cntk.layers.Dense(para[3])(h)

        h = cntk.layers.Dropout(0.25)(h)

        z = cntk.layers.Dense(10, activation=None, name='R')(h)
    loss = cntk.cross_entropy_with_softmax(z, network_label)
    label_error = cntk.classification_error(z, network_label)
    lr_schedule = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch)
    learner = cntk.momentum_sgd(z.parameters, lr_schedule, cntk.momentum_schedule(0.9))
    trainer = cntk.Trainer(z, (loss, label_error), [learner])
    if verbose: log = cntk.logging.ProgressPrinter(100)
    for _ in xrange(20000):
        data = train_reader.next_minibatch(100, input_map=mapping(train_reader))
        trainer.train_minibatch(data)
        if verbose: log.update_with_trainer(trainer)
    return trainer
Пример #25
0
def _create_convolution_model():

    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = feature_var
        # The first two layers has bias=False to test, the conversion
        # work with and without bias in the Convolution.
        h = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(2,2),
                                   pad=True, bias=False, name='first_convo')(h)        

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(2,2),
                                   pad=True, bias=False, name='second_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(1,1),
                                   pad=True, name='thrid_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(1,1),
                                   pad=True, name='fourth_convo')(h)

        r = C.layers.Dense(num_classes, activation=None, name='classify')(h)
    return r
Пример #26
0
def test_data_type_inference():
    x_float = C.input_variable((1,), dtype = np.float64)
    param1 = C.parameter((C.InferredDimension, 1), init = C.glorot_uniform(), dtype = C.cntk_py.DataType_Unknown)
    assert (param1.get_data_type() == C.cntk_py.DataType_Unknown)

    x_times_param1 = C.times(x_float, param1)
    assert (param1.dtype == np.float64)
Пример #27
0
def BinaryConvolution(operand,
                      filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,
                      bias=True,
                      init_bias=0,
                      op_name='BinaryConvolution', name=''):
    """ arguments:
            operand: tensor to convolve
            filter_shape: tuple indicating filter size
            num_filters: number of filters to use 
            channels: number of incoming channels
            init: type of initialization to use for weights
    """
    kernel_shape = (num_filters, channels) + filter_shape
    W = C.parameter(shape=kernel_shape, init=init, name="filter")

    binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand")
    binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides])
    r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve')

    bias_shape = (num_filters, 1, 1)
    b = C.parameter(shape=bias_shape, init=init_bias, name="bias")
    r = r + b

    # apply learnable param relu
    P = C.parameter(shape=r.shape, init=init, name="prelu")
    r = C.param_relu(P, r)
    return r
Пример #28
0
 def CreatRNN(cell_dim, 
              activation, 
              initial_state,
              direction, 
              num_layers, 
              init=C.default_override_or(C.glorot_uniform()), 
              init_bias=C.default_override_or(0)):
     if direction == 'bidirectional':  
         return C.layers.Sequential([  
             C.layers.For(range(num_layers), lambda i: [  
                 (C.layers.Recurrence(C.layers.RNNStep(cell_dim, 
                                                       activation = activation,    
                                                       init = init,   
                                                       init_bias = init_bias),  
                             initial_state = initial_state,  
                             return_full_state = False, go_backwards=False),   
                  C.layers.Recurrence(C.layers.RNNStep(cell_dim, activation = activation,   
                                 init = init,  
                                 init_bias = init_bias), 
                             initial_state = initial_state,  
                             return_full_state = False, go_backwards=True)),   
                 C.splice])])
     else:
         go_backward = False if direction == 'forward' else True
         return C.layers.Sequential([ 
             C.layers.For(range(num_layers), lambda i: [ 
                 C.layers.Recurrence(C.layers.RNNStep(cell_dim, 
                                                      activation = activation,   
                                 init = init,  
                                 init_bias = init_bias),  
                             initial_state = initial_state,  
                             return_full_state = False, go_backwards=go_backward)])])
def create_model(features):
    with cntk.layers.default_options(init = cntk.glorot_uniform(), activation = cntk.ops.relu):
        input = features
        for _ in range(num_hidden_layers):
            input = cntk.layers.Dense(hidden_layers_dim)(input)
        r = cntk.layers.Dense(num_output_classes, activation = None)(input)
        return r
Пример #30
0
def OptimizedRnnStack(hidden_dim,
                      num_layers=1,
                      recurrent_op='gru',
                      bidirectional=False,
                      use_cudnn=True,
                      name=''):
    if use_cudnn:
        W = C.parameter(_INFERRED + (hidden_dim, ), init=C.glorot_uniform())

        def func(x):
            return C.optimized_rnnstack(x,
                                        W,
                                        hidden_dim,
                                        num_layers,
                                        bidirectional,
                                        recurrent_op=recurrent_op,
                                        name=name)

        return func
    else:

        def func(x):
            return C.splice(C.layers.Recurrence(C.layers.LSTM(hidden_dim))(x),
                            C.layers.Recurrence(C.layers.LSTM(hidden_dim),
                                                go_backwards=True)(x),
                            name=name)

        return func
Пример #31
0
def cntk_baseline_lstm():
    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    input_var = C.sequence.input_variable(shape=(in_dim))
    fwbw = C.splice(
        C.layers.Recurrence(C.layers.LSTM(
            dim, init_bias=C.glorot_uniform()))(input_var),
        C.layers.Recurrence(C.layers.LSTM(dim), go_backwards=True)(input_var))
    ci.watch(fwbw,
             'birnn',
             var_type=cstk.RnnAttr,
             attr=cstk.RnnAttr(bidirectional=True,
                               op_type='lstm',
                               input_dim=in_dim,
                               hidden_dim=dim,
                               forget_bias=0))
    ci.watch(fwbw, 'birnn_out')

    data = {input_var: data_cntk}
    ci.set_data(data)
    ci.set_workdir(workdir)
    ci.fetch('birnn', save=True)
    ci.fetch('birnn_out', save=True)
    ci.reset()
Пример #32
0
    def create_model(self):
        w = cntk.Parameter((self.number_features, self.number_labels),
                           init=cntk.glorot_uniform(),
                           name='W')
        b = cntk.Parameter((self.number_labels, ), init=0, name='b')

        self.model = cntk.times(self.input_transform, w) + b
Пример #33
0
 def _create_model(net_input, num_output_classes, num_hidden_layers, hidden_layers_dim):
     h = net_input
     with C.layers.default_options(init=C.glorot_uniform()):
         for i in range(num_hidden_layers):
             h = C.layers.Dense(hidden_layers_dim,
                                activation=C.relu)(h)
         return C.layers.Dense(num_output_classes, activation=None)(h)
Пример #34
0
def _create_convolution_model():
    
    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = feature_var
        # The first two layers has bias=False to test, the conversion
        # work with and without bias in the Convolution.
        h = C.layers.Convolution2D(filter_shape=(5,5),
                                           num_filters=64,
                                           strides=(2,2),
                                           pad=True, bias=False, name='first_convo')(h)
        
        h = C.layers.Convolution2D(filter_shape=(5,5),
                                           num_filters=64,
                                           strides=(2,2),
                                           pad=True, bias=False, name='second_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                           num_filters=64,
                                           strides=(1,1),
                                           pad=True, name='thrid_convo')(h)

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                           num_filters=64,
                                           strides=(1,1),
                                           pad=True, name='fourth_convo')(h)
        
        r = C.layers.Dense(num_classes, activation=None, name='classify')(h)
    return r
def test_data_type_inference():
    x_float = C.input_variable((1,), dtype = np.float64)
    param1 = C.parameter((C.InferredDimension, 1), init = C.glorot_uniform(), dtype = C.cntk_py.DataType_Unknown)
    assert (param1.get_data_type() == C.cntk_py.DataType_Unknown)

    x_times_param1 = C.times(x_float, param1)
    assert (param1.dtype == np.float64)
Пример #36
0
 def CreatRNN(cell_dim, 
              activation, 
              initial_state,
              direction, 
              num_layers, 
              init=C.default_override_or(C.glorot_uniform()), 
              init_bias=C.default_override_or(0)):
     if direction == 'bidirectional':  
         return C.layers.Sequential([  
             C.layers.For(range(num_layers), lambda i: [  
                 (C.layers.Recurrence(C.layers.RNNStep(cell_dim, 
                                                       activation = activation,    
                                                       init = init,   
                                                       init_bias = init_bias),  
                             initial_state = initial_state,  
                             return_full_state = False, go_backwards=False),   
                  C.layers.Recurrence(C.layers.RNNStep(cell_dim, activation = activation,   
                                 init = init,  
                                 init_bias = init_bias), 
                             initial_state = initial_state,  
                             return_full_state = False, go_backwards=True)),   
                 C.splice])])
     else:
         go_backward = False if direction == 'forward' else True
         return C.layers.Sequential([ 
             C.layers.For(range(num_layers), lambda i: [ 
                 C.layers.Recurrence(C.layers.RNNStep(cell_dim, 
                                                      activation = activation,   
                                 init = init,  
                                 init_bias = init_bias),  
                             initial_state = initial_state,  
                             return_full_state = False, go_backwards=go_backward)])])
Пример #37
0
def create_model(features):
    with C.layers.default_options(init=C.glorot_uniform()):
        # We scale the input pixels to 0-1 range
        encode = C.layers.Dense(encoding_dim,
                                activation=C.relu)(features / 255.0)
        decode = C.layers.Dense(input_dim, activation=C.sigmoid)(encode)

    return decode
Пример #38
0
def ffnet(learner, trainer=None):
    inputs = 5
    outputs = 3
    layers = 2
    hidden_dimension = 3

    if trainer is None:
        # input variables denoting the features and label data
        features = C.input_variable((inputs), np.float32)
        label = C.input_variable((outputs), np.float32)

        # Instantiate the feedforward classification model
        my_model = Sequential ([
                        Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)),
                        Dense(outputs, init=C.glorot_uniform(seed=98052))])
        z = my_model(features)

        ce = C.cross_entropy_with_softmax(z, label)
        pe = C.classification_error(z, label)

        # Instantiate the trainer object to drive the model training
        progress_printer = ProgressPrinter(0)
        trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)], [progress_printer])
    else:
        features = trainer.loss_function.arguments[0]
        label = trainer.loss_function.arguments[1]

    # Get minibatches of training data and perform model training
    minibatch_size = 25
    num_minibatches_to_train = 100

    aggregate_loss = 0.0
    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        trainer.train_minibatch({features : train_features, label : labels})
        sample_count = trainer.previous_minibatch_sample_count
        aggregate_loss += trainer.previous_minibatch_loss_average * sample_count

    last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen

    test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch({features : test_features, label : test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return last_avg_error, avg_error, trainer
Пример #39
0
def ffnet(optimizer,  num_minibatches_to_train, learning_rate_func, lr_args, learner_kwargs):
    inputs = 2
    outputs = 2
    hidden_dimension = 50

    # input variables denoting the features and label data
    features = C.input_variable((inputs), np.float32)
    label = C.input_variable((outputs), np.float32)

    # Instantiate the feedforward classification model
    my_model = Sequential([
        Dense(hidden_dimension, activation=C.sigmoid,
              init=C.glorot_uniform(seed=SEED)),
        Dense(outputs, init=C.glorot_uniform(seed=SEED))])
    z = my_model(features)

    ce = C.cross_entropy_with_softmax(z, label)
    pe = C.classification_error(z, label)

    # Instantiate the trainer object to drive the model training
    lr= learning_rate_func(0.125, *lr_args)
    progress_printer = ProgressPrinter(0)
    learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(z.parameters, lr, **learner_kwargs)

    trainer = C.Trainer(z, (ce, pe), [learner], progress_printer)

    # Get minibatches of training data and perform model training
    minibatch_size = 25

    for i in range(num_minibatches_to_train):
        train_features, labels = generate_random_data(
            minibatch_size, inputs, outputs)
        # Specify the mapping of input variables in the model to actual
        # minibatch data to be trained with
        trainer.train_minibatch({features: train_features, label: labels})

    test_features, test_labels = generate_random_data(
        minibatch_size, inputs, outputs)
    avg_error = trainer.test_minibatch(
        {features: test_features, label: test_labels})
    print(' error rate on an unseen minibatch: {}'.format(avg_error))
    return z.parameters
Пример #40
0
def test_gather_2D_using_one_hot_and_times():
    i = C.sequence.input_variable((1,))
    indices = [[2, 0], [1]]
    sparse_one_hot = C.one_hot(i, num_classes=3, sparse_output=True)
    w = C.parameter((-1, 2, 3), init=C.glorot_uniform())
    t = C.times(sparse_one_hot, w, output_rank=2)
    result = t.eval({i : indices})
    w_value = w.value
    expected_result = [np.stack([np.expand_dims(np.asarray(w_value[idx]), axis=0) for idx in seq]) for seq in indices]
    assert np.array_equal(result[0], expected_result[0])
    assert np.array_equal(result[1], expected_result[1])
Пример #41
0
    def create_model(input):
        with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
            model = C.layers.Sequential([
                C.layers.For(range(3), lambda i: [
                    C.layers.Convolution((5,5), [32,32,64][i], pad=True),
                    C.layers.MaxPooling((3,3), strides=(2,2))
                    ]),
                C.layers.Dense(64),
                C.layers.Dense(10, activation=None)
            ])

        return model(input)
Пример #42
0
def OptimizedRnnStack(hidden_dim, num_layers=1, recurrent_op='gru', bidirectional=False, use_cudnn=True, name=''):
    if use_cudnn:
        W = C.parameter(_INFERRED + (hidden_dim,), init=C.glorot_uniform())
        def func(x):
            return C.optimized_rnnstack(x, W, hidden_dim, num_layers, bidirectional, recurrent_op=recurrent_op, name=name)
        return func
    else:
        def func(x):
            return C.splice(
                        C.layers.Recurrence(C.layers.GRU(hidden_dim))(x),
                        C.layers.Recurrence(C.layers.GRU(hidden_dim), go_backwards=True)(x),
                        name=name)
        return func
Пример #43
0
def create_model(features):
    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = features
        h = C.layers.Convolution2D(filter_shape=(util.KSIZE_CONV1, util.KSIZE_CONV1), num_filters=util.FILTERS_CONV1,strides=util.CONV1_STRIDE, pad=True, name='first_conv')(h)

        h = C.layers.MaxPooling(filter_shape=(util.POOL_SIZE1, util.POOL_SIZE1), name='first_max')(h)

        h = C.layers.Convolution2D(filter_shape=(util.KSIZE_CONV2, util.KSIZE_CONV2), num_filters=util.FILTERS_CONV2, strides=util.CONV2_STRIDE, pad=True, name='second_conv')(h)
        
        h = C.layers.MaxPooling(filter_shape=(util.POOL_SIZE2, util.POOL_SIZE2), name='second_max')(h)
        r = C.layers.Dense(num_output_clasess, activation=None, name='classify')(h)

        return r
Пример #44
0
def _create_convolution_model_with_skip_level_links():

    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = feature_var
        # The first two layers has bias=False to test, the conversion
        # work with and without bias in the Convolution.
        a = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(2,2),
                                   pad=True, bias=False, name='first_convo')(h)

        a = BatchNormalization(map_rank=1, 
                               normalization_time_constant=4096, 
                               use_cntk_engine=True, init_scale=1, 
                               disable_regularization=True)(a)
        
        b = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(2,2),
                                   pad=True, bias=False, name='second_convo')(h)

        b = BatchNormalization(map_rank=1, 
                               normalization_time_constant=4096, 
                               use_cntk_engine=True, init_scale=1, 
                               disable_regularization=True)(b)

        h = a + b

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(1,1),
                                   pad=True, name='thrid_convo')(h)

        h = BatchNormalization(map_rank=1, 
                               normalization_time_constant=4096, 
                               use_cntk_engine=True, init_scale=1, 
                               disable_regularization=True)(h)

        h = C.layers.Convolution2D(filter_shape=(5,5),
                                   num_filters=64,
                                   strides=(1,1),
                                   pad=True, name='fourth_convo')(h)

        h = BatchNormalization(map_rank=1, 
                               normalization_time_constant=4096, 
                               use_cntk_engine=True, init_scale=1, 
                               disable_regularization=True)(h)

        r = C.layers.Dense(num_classes, activation=None, name='classify')(h)
    return r
Пример #45
0
    def create_basic_model_with_batch_normalization(input, out_dims):
        with C.layers.default_options(activation=C.relu, init=C.glorot_uniform()):
            model = C.layers.Sequential([
                C.layers.For(range(3), lambda i: [
                    C.layers.Convolution((5,5), [image_width,image_height,64][i], pad=True),
                    C.layers.BatchNormalization(map_rank=1),
                    C.layers.MaxPooling((3,3), strides=(2,2))
                ]),
                C.layers.Dense(64),
                C.layers.BatchNormalization(map_rank=1),
                C.layers.Dense(out_dims, activation=None)
            ])

        return model(input)
Пример #46
0
def cntk_baseline_conv2d():
    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    input_var = C.input_variable(shape=sample_shape)
    input_reshaped = C.reshape(input_var, (1,)+sample_shape)
    conv_out = C.layers.Convolution2D(filter_shape, num_filters, init_bias=C.glorot_uniform())(input_reshaped)
    ci.watch(conv_out, 'conv2d', var_type=cstk.Conv2DAttr,
              attr=cstk.Conv2DAttr(filter_shape=filter_shape, num_filters=num_filters))
    ci.watch(conv_out, 'conv2d_out')
    data = {input_var:input_data}
    ci.set_data(data)
    ci.set_workdir(workdir)
    ci.fetch('conv2d', save=True)
    ci.fetch('conv2d_out', save=True)
    ci.reset()
Пример #47
0
def cntk_baseline_lstm():
    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    input_var = C.sequence.input_variable(shape=(in_dim))
    fwbw = C.splice(C.layers.Recurrence(C.layers.LSTM(dim, init_bias=C.glorot_uniform()))(input_var), C.layers.Recurrence(C.layers.LSTM(dim), go_backwards=True)(input_var))
    ci.watch(fwbw, 'birnn', var_type=cstk.RnnAttr,
          attr=cstk.RnnAttr(bidirectional=True, op_type='lstm', input_dim=in_dim, hidden_dim=dim, forget_bias=0))
    ci.watch(fwbw, 'birnn_out')

    data = {input_var:data_cntk}
    ci.set_data(data)
    ci.set_workdir(workdir)
    ci.fetch('birnn', save=True)
    ci.fetch('birnn_out', save=True)
    ci.reset()
Пример #48
0
def test_conv_cudnn_batch_size_change(device_id):
    if device_id == -1:
        pytest.skip('Test only runs on GPU')

    np.random.seed(0)
    input_shape = (1, 16, 100)
    input1 = C.sequence.input_variable(input_shape, needs_gradient=True, sequence_axis=C.Axis.new_unique_dynamic_axis('c'))
    input2 = C.sequence.input_variable(input_shape, needs_gradient=True, sequence_axis=C.Axis.new_unique_dynamic_axis('q'))
    conv = C.layers.Convolution2D((5,8), 100, activation=C.relu, init=C.glorot_uniform(), bias=True, init_bias=0)
    output = C.reduce_sum(conv(input1), axis=C.Axis.all_axes()) + C.reduce_sum(conv(input2), axis=C.Axis.all_axes())
    num_batches = 100 # change to greater value for a more thorough test
    batch_size = 1
    max_seq_len = [100, 10]
    for batch in range(num_batches):
        seq_lens = [[int(x*msl+1) for x in np.random.random((batch_size))] for msl in max_seq_len]
        output.grad({input1:[np.random.random((sl,) + input_shape).astype(np.float32) for sl in seq_lens[0]],
                     input2:[np.random.random((sl,) + input_shape).astype(np.float32) for sl in seq_lens[1]]})
Пример #49
0
def test_cntk_cudnn():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_lstm()
    else:
        cntk_baseline_lstm()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance

    input_var = C.sequence.input_variable(shape=(in_dim))
    data = {input_var:data_cntk}
    ci.set_data(data)
    ci.set_workdir(workdir)

    W = C.parameter((-1,dim,), init=C.glorot_uniform())
    cudnn_fwbw = C.optimized_rnnstack(input_var, W, dim, 1, bidirectional=True, recurrent_op='lstm')
    ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn', var_type=cstk.RnnAttr,
          attr=cstk.RnnAttr(bidirectional=True, op_type='lstm', input_dim=in_dim, hidden_dim=dim, forget_bias=0))
    ci.watch(cudnn_fwbw, 'cntk_birnn_cudnn_out')

    ci.assign('cntk_birnn_cudnn', load=True, load_name='birnn')
    assert ci.compare('cntk_birnn_cudnn_out', compare_name='birnn_out', rtol=1e-4, atol=1e-6)

    ci.fetch('cntk_birnn_cudnn', save=True)
    ci.assign('cntk_birnn_cudnn', load=True)
    assert ci.compare('cntk_birnn_cudnn_out', compare_name='birnn_out', rtol=1e-4, atol=1e-6)

    # test assign with value
    num_gates=4
    ci.assign('cntk_birnn_cudnn', value=cstk.RnnArgs(fw_W=np.random.random((in_dim,num_gates*dim)).astype(np.float32),
                                                     fw_H=np.random.random((dim,num_gates*dim)).astype(np.float32),
                                                     fw_b=np.random.random((num_gates*dim,)).astype(np.float32),
                                                     bw_W=np.random.random((in_dim,num_gates*dim)).astype(np.float32),
                                                     bw_H=np.random.random((dim,num_gates*dim)).astype(np.float32),
                                                     bw_b=np.random.random((num_gates*dim,)).astype(np.float32)))

    ci.reset()
Пример #50
0
def test_saving_and_loading_int16_ndarray_as_attribute(tmpdir):
    model_file = str(tmpdir/'test_model_int16.bin')
    delete_if_file_exists(model_file)

    data = np.arange(0,64, dtype=np.int16).reshape(16,4)
    dict_val = C._to_cntk_dict_value(data)

    W = C.Parameter((C.InferredDimension, 42), init=C.glorot_uniform(), dtype=np.float)
    x = C.input_variable(12, dtype=np.float)
    y = C.times(x, W)
    y.custom_attributes = {'int16_nd':dict_val}
    y.save(model_file)

    assert(os.path.isfile(model_file))

    z = C.load_model(model_file)
    int16_data = z.custom_attributes['int16_nd']
    assert(int16_data.shape == (16,4))

    assert (np.array_equal(int16_data, data))
Пример #51
0
def binary_convolution(filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,                      
                      name='BinaryConvolution'):  
    '''
    Creates a binary convolution function based on the input parameters. 

    Args:
        filter_shape : shape of the filter
        num_filters  : number of filters to use
        init         : initialization function for the filter
        pad          : padding enabled or not for the filter
        strides      : overlap for this filter
        name         : name given to the binary convolution.
                        
    Returns:
        a function for performing binary convolution
    '''

    kernel_shape = (num_filters, channels) + filter_shape
    W = C.Parameter(shape=kernel_shape, init=init, name="filter")
    
    
    def convolution(operand):
        
        bcv_operand_p = C.placeholder(
            operand.shape, operand.dynamic_axes, name="operand")
        
        bcv = C.convolution(
                    CustomMultibit(W, 1), 
                    CustomMultibit(bcv_operand_p, 1), 
                    auto_padding=[False, pad, pad], 
                    strides=[strides])

        return  C.as_block(bcv, [(bcv_operand_p, operand)], name)
                  
    return convolution
Пример #52
0
np.random.seed(0)
def generate_synthetic_data(N):
    Y = np.random.randint(size=N, low=0, high=num_classes)  # labels
    X = (np.random.randn(N, input_dim)+3) * (Y[:,None]+1)   # data
    # Our model expects float32 features, and cross-entropy expects one-hot encoded labels.
    Y = scipy.sparse.csr_matrix((np.ones(N,np.float32), (range(N), Y)), shape=(N, num_classes))
    X = X.astype(np.float32)
    return X, Y
X_train, Y_train = generate_synthetic_data(20000)
X_test,  Y_test  = generate_synthetic_data(1024)

# Define the CNTK model function. The model function maps input data to
# predictions (here: 2-dimensional inputs --> 2 scores).
# This simple logistic-regression model just uses a linear transform.
data = cntk.input_variable(input_dim)
W = cntk.Parameter((input_dim, num_classes), init=cntk.glorot_uniform(), name='W')
b = cntk.Parameter((num_classes,), init=0, name='b')
model = cntk.times(data, W) + b

# Define the CNTK criterion function. A criterion function maps
# (input vectors, labels) to a loss function and an optional additional
# metric. The loss function is used to train the model parameters.
# We use cross entropy as a loss function.
label_one_hot = cntk.input_variable(num_classes, is_sparse=True)
loss   = cntk.cross_entropy_with_softmax(model, label_one_hot) # this applies softmax to model's output under the hood
metric = cntk.classification_error(model, label_one_hot)
criterion = cntk.combine([loss, metric]) # criterion is a tuple-valued function (loss, metric)

# Learner object. The learner implements the update algorithm, in this case plain SGD.
learning_rate = 0.1
learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate))
Пример #53
0
def deconv_mnist(max_epochs=3):
    image_height = 28
    image_width  = 28
    num_channels = 1
    input_dim = image_height * image_width * num_channels
    num_output_classes = 10

    # Input variable and normalization
    input_var = cntk.ops.input_variable((num_channels, image_height, image_width), np.float32)
    scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)

    # Define the auto encoder model
    cMap = 1
    conv1   = cntk.layers.Convolution2D  ((5,5), cMap, pad=True, activation=cntk.ops.relu)(scaled_input)
    pool1   = cntk.layers.MaxPooling   ((4,4), (4,4))(conv1)
    unpool1 = cntk.layers.MaxUnpooling ((4,4), (4,4))(pool1, conv1)
    z       = cntk.layers.ConvolutionTranspose2D((5,5), num_channels, pad=True, bias=False, init=cntk.glorot_uniform(0.001))(unpool1)

    # define rmse loss function (should be 'err = cntk.ops.minus(deconv1, scaled_input)')
    f2        = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)
    err       = cntk.ops.reshape(cntk.ops.minus(z, f2), (784))
    sq_err    = cntk.ops.element_times(err, err)
    mse       = cntk.ops.reduce_mean(sq_err)
    rmse_loss = cntk.ops.sqrt(mse)
    rmse_eval = cntk.ops.sqrt(mse)

    reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes)

    # training config
    epoch_size = 60000
    minibatch_size = 64

    # Set learning parameters
    lr_schedule = cntk.learning_rate_schedule([0.00015], cntk.learner.UnitType.sample, epoch_size)
    mm_schedule = cntk.learner.momentum_as_time_constant_schedule([600], epoch_size)

    # Instantiate the trainer object to drive the model training
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True)
    progress_printer = cntk.utils.ProgressPrinter(tag='Training')
    trainer = cntk.Trainer(z, (rmse_loss, rmse_eval), learner, progress_printer)

    # define mapping from reader streams to network inputs
    input_map = {
        input_var : reader_train.streams.features
    }

    cntk.utils.log_number_of_parameters(z) ; print()

    # Get minibatches of images to train with and perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += data[input_var].num_samples                     # count samples processed so far

        trainer.summarize_training_progress()
        z.save(os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(epoch)))

    # rename final model
    last_model_name = os.path.join(model_path, "07_Deconvolution_PY_{}.model".format(max_epochs - 1))
    final_model_name = os.path.join(model_path, "07_Deconvolution_PY.model")
    try:
        os.remove(final_model_name)
    except OSError:
        pass
    os.rename(last_model_name, final_model_name)
    
    # Load test data
    reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes)

    input_map = {
        input_var : reader_test.streams.features
    }

    # Test data for trained model
    epoch_size = 10000
    minibatch_size = 1024

    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0

    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)

        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)

        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch

        # Keep track of the number of samples processed so far.
        sample_count += data[input_var].num_samples
        minibatch_index += 1

    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")

    return metric_numer/metric_denom
Пример #54
0
 def embed(self):
     # load glove
     npglove = np.zeros((self.wg_dim, self.w2v_hidden_dim), dtype=np.float32)
     with open(os.path.join(self.abs_path, 'glove.6B.100d.txt'), encoding='utf-8') as f:
         for line in f:
             parts = line.split()
             word = parts[0].lower()
             if word in self.vocab:
                 npglove[self.vocab[word],:] = np.asarray([float(p) for p in parts[1:]])
     glove = C.constant(npglove)
     nonglove = C.parameter(shape=(len(self.vocab) - self.wg_dim, self.w2v_hidden_dim), init=C.glorot_uniform(), name='TrainableE')
     
     def func(wg, wn):
         return C.times(wg, glove) + C.times(wn, nonglove)
     return func
 def blocked(d):
     blocked_W = C.parameter((-1,d), init = C.glorot_uniform())
     @C.layers.BlockFunction('', '')
     def func(x):
         return C.optimized_rnnstack(x, blocked_W, d, 1, recurrent_op='lstm')
     return func
def simple_mnist():
    input_dim = 784
    num_output_classes = 10
    num_hidden_layers = 2
    hidden_layers_dim = 200

    # Input variables denoting the features and label data
    feature = C.input_variable(input_dim)
    label = C.input_variable(num_output_classes)

    # Instantiate the feedforward classification model
    scaled_input = element_times(constant(0.00390625), feature)

    # z = Sequential([
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(hidden_layers_dim, activation=relu),
    #     Dense(num_output_classes)])(scaled_input)

    with default_options(activation=relu, init=C.glorot_uniform()):
        z = Sequential([For(range(num_hidden_layers),
            lambda i: Dense(hidden_layers_dim)),
            Dense(num_output_classes, activation=None)])(scaled_input)

    ce = cross_entropy_with_softmax(z, label)
    pe = classification_error(z, label)

    # setup the data
    path = abs_path + "\Train-28x28_cntk_text.txt"

    reader_train = MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='features', shape=input_dim),
        labels=StreamDef(field='labels', shape=num_output_classes))))

    input_map = {
        feature: reader_train.streams.features,
        label: reader_train.streams.labels
    }

    # Training config
    minibatch_size = 64
    num_samples_per_sweep = 60000
    num_sweeps_to_train_with = 10

    # Instantiate progress writers.
    progress_writers = [ProgressPrinter(
        tag='Training',
        num_epochs=num_sweeps_to_train_with)]

    # Instantiate the trainer object to drive the model training
    lr = learning_rate_schedule(1, UnitType.sample)
    trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers)

    training_session(
        trainer=trainer,
        mb_source=reader_train,
        mb_size=minibatch_size,
        model_inputs_to_streams=input_map,
        max_samples=num_samples_per_sweep * num_sweeps_to_train_with,
        progress_frequency=num_samples_per_sweep
    ).train()

    # Load test data
    path = abs_path + "\Test-28x28_cntk_text.txt"

    reader_test = MinibatchSource(CTFDeserializer(path, StreamDefs(
        features=StreamDef(field='features', shape=input_dim),
        labels=StreamDef(field='labels', shape=num_output_classes))))

    input_map = {
        feature: reader_test.streams.features,
        label: reader_test.streams.labels
    }

    # Test data for trained model
    test_minibatch_size = 1024
    num_samples = 10000
    num_minibatches_to_test = num_samples / test_minibatch_size
    test_result = 0.0
    for i in range(0, int(num_minibatches_to_test)):
        mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map)
        eval_error = trainer.test_minibatch(mb)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    return test_result / num_minibatches_to_test
Пример #57
0
 def charcnn(self, x):
     conv_out = C.layers.Sequential([
         C.layers.Embedding(self.char_emb_dim),
         C.layers.Dropout(self.dropout),
         C.layers.Convolution2D((5,self.char_emb_dim), self.convs, activation=C.relu, init=C.glorot_uniform(), bias=True, init_bias=0, name='charcnn_conv')])(x)
     return C.reduce_max(conv_out, axis=1) # workaround cudnn failure in GlobalMaxPooling