示例#1
0
    def build_model(self):
        phmap = self.get_inputs()
        df = phmap['df']
        qf = phmap['qf']
        ab = phmap['ab']
        ae = phmap['ae']
        #input layer
        cc = C.reshape(phmap['cc'], (1, -1))
        qc = C.reshape(phmap['qc'], (1, -1))
        c_processed, q_processed = self.input_layer(phmap['cgw'],phmap['cnw'],cc,\
            phmap['qgw'],phmap['qnw'],qc).outputs
        c_processed = C.splice(c_processed, df)
        q_processed = C.splice(q_processed, qf)
        # attention layer output:[#,c][8*hidden_dim]
        att_context, wei1 = self.attention_layer(c_processed,
                                                 q_processed,
                                                 dimc=2 * self.hidden_dim + 3,
                                                 dimq=2 * self.hidden_dim + 1,
                                                 common_dim=2 *
                                                 self.hidden_dim).outputs
        # modeling layer output:[#][1] [#,c][2*hidden_dim]
        mod_context_reg = self.modeling_layer(att_context)
        # output layer
        mod_context_reg = C.splice(mod_context_reg, df)
        start_logits, end_logits = self.output_layer(att_context,
                                                     mod_context_reg).outputs

        # loss
        new_loss = all_spans_loss(start_logits, ab, end_logits, ae)
        res = C.combine([start_logits, end_logits])
        self._model = res
        self._loss = new_loss
        return self._model, self._loss, self._input_phs
示例#2
0
    def simi_attention(self, input, memory):
        '''
        return:
        memory weighted vectors over input [#,c][d]
        weight
        '''
        input_ph = C.placeholder()  # [#,c][d]
        mem_ph = C.placeholder()  # [#,q][d]

        input_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1)
        mem_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1)
        bias = C.Parameter(shape=(2 * self.hidden_dim, ), init=0.0)
        weight_dense = Dense(1, bias=False, input_rank=1)

        proj_inp = input_dense(input_ph)  # [#,c][d]
        proj_mem = mem_dense(mem_ph)  # [#,q][d]
        unpack_memory, mem_mask = C.sequence.unpack(
            proj_mem, 0).outputs  # [#][*=q, d] [#][*=q]
        expand_mem = C.sequence.broadcast_as(unpack_memory,
                                             proj_inp)  # [#,c][*=q,d]
        expand_mask = C.sequence.broadcast_as(mem_mask, proj_inp)  # [#,c][*=q]
        matrix = C.reshape(weight_dense(C.tanh(proj_inp + expand_mem + bias)),
                           (-1, ))  # [#,c][*=q]
        matrix = C.element_select(expand_mask, matrix, -1e30)
        logits = C.softmax(matrix, axis=0)  # [#,c][*=q]
        weight_mem = C.reduce_sum(C.reshape(logits, (-1, 1)) * expand_mem,
                                  axis=0)  # [#,c][d]
        weight_mem = C.reshape(weight_mem, (-1, ))

        return C.as_block(C.combine(weight_mem, logits), [(input_ph, input),
                                                          (mem_ph, memory)],
                          'simi_attention', 'simi_attention')
def UpSampling(x):
    xr = C.reshape(x, (x.shape[0], x.shape[1], 1, x.shape[2], 1))
    xx = C.splice(xr, xr, axis=-1)
    xy = C.splice(xx, xx, axis=-3)
    r = C.reshape(xy, (x.shape[0], x.shape[1] * 2, x.shape[2] * 2))

    return r
示例#4
0
def UpSampling2D(x):
    xr = C.reshape(x, (x.shape[0], x.shape[1], 1, x.shape[2], 1))
    xx = C.splice(xr, xr, axis=-1)  # axis=-1 refers to the last axis
    xy = C.splice(xx, xx, axis=-3)  # axis=-3 refers to the middle axis
    r = C.reshape(xy, (x.shape[0], x.shape[1] * 2, x.shape[2] * 2))

    return r
示例#5
0
    def createDecoderNetwork(self, networkHiddenSrc, srcLength, trgLength):
        timeZeroHidden = C.slice(networkHiddenSrc, 0, 0, 1)
        srcSentEmb = C.slice(timeZeroHidden, -1, Config.SrcHiddenSize,
                             Config.SrcHiddenSize * 2)
        networkHiddenTrg = {}
        inputTrg = C.reshape(self.inputMatrixTrg,
                             shape=(Config.TrgMaxLength, Config.BatchSize,
                                    Config.TrgVocabSize))
        attProbAll = []
        tce = 0
        for i in range(0, trgLength, 1):

            preTrgEmb = self.initTrgEmb if i == 0 else self.EmbTrg(inputTrg[i -
                                                                            1])

            if (i == 0):
                networkHiddenTrg[i] = self.createDecoderInitNetwork(srcSentEmb)
            else:
                (networkHiddenTrg[i], attProb) = self.createDecoderRNNNetwork(
                    networkHiddenSrc, preTrgEmb, networkHiddenTrg[i - 1],
                    srcLength)
                attProbAll = attProb if i == 1 else C.splice(
                    attProbAll, attProb, axis=0)

            preSoftmax = self.createReadOutNetwork(networkHiddenTrg[i],
                                                   preTrgEmb)
            ce = C.cross_entropy_with_softmax(preSoftmax, inputTrg[i], 2)
            ce = C.reshape(ce, shape=(1, Config.BatchSize))
            tce += C.times_transpose(ce, self.maskMatrixTrg[i])

        return tce
示例#6
0
def test_depth_to_space(image_shape, num_channels, block_size, device_id,
                        precision):
    dev = cntk_device(device_id)
    from cntk.internal import sanitize_dtype_cntk

    input_val = np.array(np.reshape(range(num_channels), (num_channels, 1, 1)),
                         dtype=PRECISION_TO_TYPE[precision])
    input_val = np.tile(input_val, (1, ) + image_shape)
    img = C.input_variable(
        (num_channels, ) + image_shape,
        dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]))

    # Result from depth_to_space node.
    depth_to_space_op = C.depth_to_space(img, block_size)
    output_test = depth_to_space_op.eval({img: input_val})

    # Reference result from simulating depth_to_space with other CNTK ops.
    h, w = image_shape
    reshape_node = C.reshape(img, (block_size, block_size, num_channels //
                                   (block_size**2), h, w))
    transpose_node = C.transpose(reshape_node, [2, 3, 0, 4, 1])
    depth_to_space_sim_op = C.reshape(
        transpose_node,
        (num_channels // (block_size**2), h * block_size, w * block_size))
    output_ref = depth_to_space_sim_op.eval({img: input_val})

    assert np.array_equal(output_test, output_ref)
示例#7
0
def create_word2vec_cbow_model(word_one_hot, context_one_hots, negative_one_hots):
	# shared_embedding_layer = Embedding(G.embedding_dimension, uniform(scale=1.0/2.0/G.embedding_dimension))
	shared_embedding_layer = Embedding(G.embedding_dimension)

	word_embedding = shared_embedding_layer(word_one_hot)
	context_embeddings = [shared_embedding_layer(x) for x in context_one_hots]
	negative_embeddings = [shared_embedding_layer(x) for x in negative_one_hots]

	print(word_embedding.shape)
	word_embedding_reshaped = C.reshape(word_embedding, shape=(1, G.embedding_dimension))
	print(word_embedding_reshaped.shape)

	context_embeddings_all = C.reshape(C.splice(*context_embeddings), shape=(context_size, G.embedding_dimension))
	negative_embeddings_all = C.reshape(C.splice(*negative_embeddings), shape=(G.negative, G.embedding_dimension))
	print(context_embeddings_all.shape)
	print(negative_embeddings_all.shape)
	cbow = C.reshape(C.reduce_mean(context_embeddings_all, 0), shape=(G.embedding_dimension))
	print(cbow.shape)

	# word_context_product = C.times_transpose(word_embedding_reshaped, cbow)
	word_context_product = C.times_transpose(word_embedding, cbow)
	print(word_context_product.shape)
	negative_context_product = C.reshape(C.times_transpose(negative_embeddings_all, cbow), shape=(G.negative))
	print(negative_context_product.shape)

	word_negative_context_product = C.splice(word_context_product, negative_context_product)
	print(word_negative_context_product.shape)
	# return model and shared embedding layer
	return word_negative_context_product, shared_embedding_layer
示例#8
0
 def multiFunc(self, arg1):
     # load or create the inputs we need
     multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes)
     bit_map = C.constant(self.bit_map)
     max_bits = self.bit_map.max()
     shape = multiIn.shape
     reformed = C.reshape(multiIn, (-1,))
     # lets compute the means we need
     # carry over represents the remaining value that needs to binarized. For a single bit, this is just the input. For more bits,
     # it is the difference between the previous bits approximation and the true value.
     carry_over = multiIn
     approx = C.element_times(multiIn, 0)
     # iterate through the maximum number of bits specified by the bit maps, basically compute each level of binarization
     for i in range(max_bits):
         # determine which values of the input should be binarized to i bits or more
         hot_vals = C.greater(bit_map, i)
         # select only the values which we need to binarize
         valid_vals = C.element_select(hot_vals, carry_over, 0)
         # compute mean on a per kernel basis, reshaping is done to allow for sum reduction along only axis 0 (the kernels)
         mean = C.element_divide(C.reduce_sum(C.reshape(C.abs(valid_vals), (valid_vals.shape[0], -1)), axis=1), C.reduce_sum(C.reshape(hot_vals, (hot_vals.shape[0], -1)), axis=1))
         # reshape the mean to match the dimensionality of the input
         mean = C.reshape(mean, (mean.shape[0], mean.shape[1], 1, 1))
         # binarize the carry over
         bits = C.greater(carry_over, 0)
         bits = C.element_select(bits, bits, -1)
         bits = C.element_select(hot_vals, bits, 0)
         # add in the equivalent binary representation to the approximation
         approx = C.plus(approx, C.element_times(mean, bits))
         # compute the new carry over
         carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over)
         
     return approx, multiIn
示例#9
0
 def new_attention(encoder_hidden_state, decoder_hidden_state):
     # encode_hidden_state: [#, e] [h]
     # decoder_hidden_state: [#, d] [H]
     unpacked_encoder_hidden_state, valid_mask = C.sequence.unpack(encoder_hidden_state, padding_value=0).outputs
     # unpacked_encoder_hidden_state: [#] [*=e, h]
     # valid_mask: [#] [*=e]
     projected_encoder_hidden_state = C.sequence.broadcast_as(attn_proj_enc(unpacked_encoder_hidden_state), decoder_hidden_state)
     # projected_encoder_hidden_state: [#, d] [*=e, attention_dim]
     broadcast_valid_mask = C.sequence.broadcast_as(C.reshape(valid_mask, (1,), 1), decoder_hidden_state)
     # broadcast_valid_mask: [#, d] [*=e]
     projected_decoder_hidden_state = attn_proj_dec(decoder_hidden_state)
     # projected_decoder_hidden_state: [#, d] [attention_dim]
     tanh_output = C.tanh(projected_decoder_hidden_state + projected_encoder_hidden_state)
     # tanh_output: [#, d] [*=e, attention_dim]
     attention_logits = attn_proj_tanh(tanh_output)
     # attention_logits = [#, d] [*=e, 1]
     minus_inf = C.constant(-1e+30)
     masked_attention_logits = C.element_select(broadcast_valid_mask, attention_logits, minus_inf)
     # masked_attention_logits = [#, d] [*=e]
     attention_weights = C.softmax(masked_attention_logits, axis=0)
     attention_weights = Label('attention_weights')(attention_weights)
     # attention_weights = [#, d] [*=e]
     attended_encoder_hidden_state = C.reduce_sum(attention_weights * C.sequence.broadcast_as(unpacked_encoder_hidden_state, attention_weights), axis=0)
     # attended_encoder_hidden_state = [#, d] [1, h]
     output = attn_final_stab(C.reshape(attended_encoder_hidden_state, (), 0, 1))
     # output = [#, d], [h]
     return output
示例#10
0
    def criterion(self):

        # hyperparameters
        lambda_val = 0.5

        # Margin loss
        left = ct.square(ct.relu(0.9 - self.length))
        right = ct.square(ct.relu(self.length - 0.1))
        left = ct.reshape(left, (-1))
        right = ct.reshape(right, (-1))
        lc = self.labels * left + lambda_val * (1 - self.labels) * right

        margin_loss = ct.reduce_sum(lc, axis=0)
        margin_loss = ct.reduce_mean(margin_loss, axis=ct.axis.Axis.default_batch_axis())

        # classification_error
        predict = ct.softmax(self.length, axis=0)
        error = ct.classification_error(ct.reshape(predict, (10)), self.labels)

        total_loss = margin_loss
        reconstruction_err = 0

        if self.use_reconstruction:
            features = ct.reshape(self.features, shape=(-1,))
            encoder = ct.reshape(self.training_model, shape=(-1,))
            squared = ct.square(encoder - features)
            reconstruction_err = ct.reduce_mean(squared, axis=0)
            reconstruction_err = ct.reduce_mean(reconstruction_err, axis=ct.axis.Axis.default_batch_axis())
            total_loss = margin_loss + (0.0005*784) * reconstruction_err

        return total_loss, error
示例#11
0
def test_reshape_free_static_axis():
    x = C.input((C.FreeDimension, 2, 3))
    x_reshaped = C.reshape(x, (-1), 0, 2)
    assert x_reshaped.shape == (C.FreeDimension, 3)
    x_data = np.arange(12).reshape(2, 2, 3)
    result = x_reshaped.eval({x: x_data})
    assert np.array_equal(result[0], x_data.reshape(4, 3))

    x_data = np.arange(18).reshape(3, 2, 3)
    result = x_reshaped.eval({x: x_data})
    assert np.array_equal(result[0], x_data.reshape(6, 3))

    x_reshaped = C.reshape(x, (-1), 1, 3)
    assert x_reshaped.shape == (C.FreeDimension, 6)
    x_data = np.arange(12).reshape(2, 2, 3)
    result = x_reshaped.eval({x: x_data})
    assert np.array_equal(result[0], x_data.reshape(2, 6))

    x_reshaped = C.reshape(x, (4), 0, 2)
    assert x_reshaped.shape == (4, 3)
    x_data = np.arange(12).reshape(2, 2, 3)
    result = x_reshaped.eval({x: x_data})
    assert np.array_equal(result[0], x_data.reshape(4, 3))

    x_data = np.arange(6).reshape(1, 2, 3)
    with pytest.raises(ValueError):
        result = x_reshaped.eval({x: x_data})
示例#12
0
def test_reshape_free_static_axis():
    x = C.input_variable((C.FreeDimension, 2, 3))
    x_reshaped = C.reshape(x, (-1), 0, 2)
    assert x_reshaped.shape == (C.FreeDimension, 3)
    x_data = np.arange(12).reshape(2, 2, 3)
    result = x_reshaped.eval({x : x_data})
    assert np.array_equal(result[0], x_data.reshape(4, 3))

    x_data = np.arange(18).reshape(3, 2, 3)
    result = x_reshaped.eval({x : x_data})
    assert np.array_equal(result[0], x_data.reshape(6, 3))

    x_reshaped = C.reshape(x, (-1), 1, 3)
    assert x_reshaped.shape == (C.FreeDimension, 6)
    x_data = np.arange(12).reshape(2, 2, 3)
    result = x_reshaped.eval({x : x_data})
    assert np.array_equal(result[0], x_data.reshape(2, 6))

    x_reshaped = C.reshape(x, (4), 0, 2)
    assert x_reshaped.shape == (4, 3)
    x_data = np.arange(12).reshape(2, 2, 3)
    result = x_reshaped.eval({x : x_data})
    assert np.array_equal(result[0], x_data.reshape(4, 3))

    x_data = np.arange(6).reshape(1, 2, 3)
    with pytest.raises(ValueError):
        result = x_reshaped.eval({x : x_data})
 def instance_normalization(x):
     mean = C.reduce_mean(x, axis=(1, 2))
     x0 = x - mean
     std = C.sqrt(C.reduce_mean(x0 * x0, axis=(1, 2)))
     if epsilon != 0:
         std += epsilon
     x_hat = x0 / std
     return x_hat * C.reshape(scale, (-1, 1, 1)) + C.reshape(bias, (-1, 1, 1))
示例#14
0
 def __local_response_normalization(self, k, n, alpha, beta, name=''):
     x = cntk.placeholder(name='lrn_arg')
     x2 = cntk.square(x)
     x2s = cntk.reshape(x2, (1, cntk.InferredDimension), 0, 1)
     W = cntk.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
     y = cntk.convolution(W, x2s)
     b = cntk.reshape(y, cntk.InferredDimension, 0, 2)
     den = cntk.exp(beta * cntk.log(k + b))
     apply_x = cntk.element_divide(x, den)
     return apply_x
示例#15
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
示例#16
0
文件: NIN_test2.py 项目: lizishu/CNTK
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
    y = C.convolution(W, x2s)
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)

    return apply_x
示例#17
0
文件: __init__.py 项目: ssokhey/CNTK
def hierarchical_softmax_layer(input_var, label_index, label_dim, label_classes=None):
    '''
    A two layers hierarchical softmax function:

    Args:
        input_var: Variable with shape: [#,*](dim_x)
        label_index: index of label's category:  [#,*](1)
        label_dim: number of the label categories
        label_classes: number of classes of the label categories
    Returns:
        output_prob: the probability of the given label [#,*](1)
        class_probs: the probability of all the label classes [#,*](label_classes)
        all_probs: the probability of all label classes 
    '''
    input_dim = input_var.shape[0]

    if not label_classes:
        label_classes = int(np.ceil(np.sqrt(float(label_dim))))

    n_outputs_per_class = int(np.ceil(label_dim / label_classes))

    target_class = C.floor((label_index + 0.5) / n_outputs_per_class)
    target_output_in_class = C.round(label_index - target_class * n_outputs_per_class)

    w1 = parameter(shape=(input_dim, label_classes), init=C.glorot_normal(), name='hsoftmax_w1')
    b1 = parameter(shape=(label_classes), init=C.glorot_normal(), name='hsoftmax_b1')
    w2s = parameter(shape=(label_classes, input_dim, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_w2s')
    b2s = parameter(shape=(label_classes, n_outputs_per_class,), init=C.glorot_normal(), name='hsoftmax_b2s')

    class_probs = softmax(b1 + times(input_var, w1))

    # TODO: fix the bug in backprop for sparse, and use sparse embedding to accelerate
    target_class_one_hot = C.one_hot(target_class, num_classes=label_classes, sparse_output=False)
    w2 = C.reshape(C.times(target_class_one_hot, w2s, output_rank=2), [input_dim, -1])
    b2 = C.reshape(times(target_class_one_hot, b2s, output_rank=1), [-1])
    probs_in_class = softmax(b2 + times(input_var, w2))

    prob_in_class = C.times_transpose(C.one_hot(target_output_in_class, num_classes=n_outputs_per_class, sparse_output=False), probs_in_class)
    class_prob = C.times_transpose(C.one_hot(target_class, num_classes=label_classes, sparse_output=False), class_probs)
    output_prob = prob_in_class * class_prob

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(label_classes):
        ci = C.constant(i)
        ci_one_hot = C.one_hot(ci, num_classes=label_classes, sparse_output=False)
        w2a = C.times(ci_one_hot, w2s, output_rank=2)
        b2a = C.times(ci_one_hot, b2s, output_rank=1)
        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        class_proba = C.times_transpose(ci_one_hot, class_probs)
        output_proba = probs_in_classa * class_proba
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
示例#18
0
 def lrn(x, depth_radius, bias, alpha, beta, name=''):
     x2 = C.square(x)
     # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
     x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
     W = C.constant(alpha/(2*depth_radius+1), shape=(1,2*depth_radius+1,1,1), dtype=dtype, name='W')
     # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
     y = C.convolution (W, x2s)
     # reshape back to remove the fake singleton reduction dimension
     b = C.reshape(y, C.InferredDimension, 0, 2)
     den = C.exp(beta * C.log(bias + b))
     return C.element_divide(x, den)
示例#19
0
    def masking(input, labels):

        if not is_onehot_encoded:
            mask = ct.reshape(ct.one_hot(
                ct.reshape(ct.argmax(labels, axis=0), shape=(-1, )), 10),
                              shape=(10, 1, 1))
            mask = ct.stop_gradient(mask)
        else:
            mask = ct.reshape(labels, shape=(10, 1, 1))

        mask = ct.splice(*([mask] * 16), axis=1)
        return ct.reshape(ct.element_times(input, mask), shape=(-1, ))
示例#20
0
    def attention_layer(self, context, query, layer):

        q_processed = C.placeholder(shape=(2*self.hidden_dim,))
        p_processed = C.placeholder(shape=(2*self.hidden_dim,))

        qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs

        wq = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
        wp = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
        wg = C.parameter(shape=(8*self.hidden_dim, 8*self.hidden_dim), init=C.glorot_uniform())
        v = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform())

        # seq[tensor[2d]] p_len x 2d
        wpt = C.reshape(C.times(p_processed, wp), (-1, 2*self.hidden_dim))

        # q_len x 2d
        wqt = C.reshape(C.times(qvw, wq), (-1, 2*self.hidden_dim))
        
        # seq[tensor[q_len]]
        S = C.reshape(C.times(C.tanh(C.sequence.broadcast_as(wqt, p_processed) + wpt), v), (-1))

        qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, p_processed)

        # seq[tensor[q_len]]
        S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30))
        
        # seq[tensor[q_len]]
        A = C.softmax(S, axis=0)

        # seq[tensor[2d]]
        swap_qvw = C.swapaxes(qvw)
        cq = C.reshape(C.reduce_sum(A * C.sequence.broadcast_as(swap_qvw, A), axis=1), (-1))

        # seq[tensor[4d]]
        uc_concat = C.splice(p_processed, cq, p_processed * cq, cq * cq)
        
        # seq[tensor[4d]]
        gt = C.tanh(C.times(uc_concat, wg))
        
        # seq[tensor[4d]]
        uc_concat_star = gt * uc_concat
 
        # seq[tensor[4d]]
        vp = C.layers.Sequential([
            C.layers.Dropout(self.dropout),
            OptimizedRnnStack(self.hidden_dim, bidirectional=True, 
                use_cudnn=self.use_cudnn, name=layer+'_attention_rnn')])(uc_concat_star)
        
        return C.as_block(
            vp,
            [(p_processed, context), (q_processed, query)],
            'attention_layer',
            'attention_layer')
示例#21
0
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha/(2*n+1), (1,2*n+1,1,1), name='W')
    # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
    y = C.convolution (W, x2s)
    # reshape back to remove the fake singleton reduction dimension
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)
    return apply_x
示例#22
0
 def cnwindow(mna,window):
     mnas=mna.shape
     mnout=(*mnas[:-2],*window,((mnas[-2]-window[-2])+1),((mnas[-1]-window[-1])+1))
     mne2=None
     for R in range(window[0]):
         j_lim = R + mnout[-2]
         for H in range(window[1]):
             tdata=C.slice(mna,[-2,-1], [R,H], [j_lim,(H +  mnout[-1])])
             if mne2 is None:
                 mne2=tdata
             else:
                 mne2=C.splice(mne2,tdata,axis=1)
     return(C.reshape(C.transpose(C.reshape(mne2, shape=mnout),(0,5,4,3,2,1)), (mnout[0],*mnout[5:3:-1],1,*mnout[3:0:-1])))
示例#23
0
def cgan_discriminator(x, y):
    with C.layers.default_options(init=C.normal(scale=0.02), map_rank=1, use_cntk_engine=True):
        hx = C.reshape(x, (1, 28, 28))
        hy = C.ones_like(hx) * C.reshape(y, (label_dim, 1, 1))
        h = C.splice(hx, hy, axis=0)

        h = C.leaky_relu((Convolution2D((5, 5), 1, strides=(2, 2))(h)), alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Convolution2D((5, 5), 64, strides=(2, 2))(h)), alpha=0.2)
        h = C.leaky_relu(BatchNormalization()(Dense(1024)(h)), alpha=0.2)

        h = Dense(1, activation=C.sigmoid)(h)

    return h
示例#24
0
def UpSampling2D(x):
    xr = C.reshape(x, (x.shape[0], x.shape[1], 1, x.shape[2], 1))
    xx = C.splice(xr, xr, axis=-1)  # axis=-1 refers to the last axis
    xy = C.splice(xx, xx, axis=-3)  # axis=-3 refers to the middle axis
    r = C.reshape(xy, (x.shape[0], x.shape[1] * 2, x.shape[2] * 2))
    '''
    print ("upsampling")
    print(xr.shape)
    print(xx.shape)
    print(xy.shape)
    print(r.shape)
    '''
    return r
示例#25
0
def LocalResponseNormalization(k, n, alpha, beta, name=''):
    x = C.placeholder(name='lrn_arg')
    x2 = C.square(x)
    # reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
    x2s = C.reshape(x2, (1, C.InferredDimension), 0, 1)
    W = C.constant(alpha / (2 * n + 1), (1, 2 * n + 1, 1, 1), name='W')
    # 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
    y = C.convolution(W, x2s)
    # reshape back to remove the fake singleton reduction dimension
    b = C.reshape(y, C.InferredDimension, 0, 2)
    den = C.exp(beta * C.log(k + b))
    apply_x = C.element_divide(x, den)
    return apply_x
示例#26
0
    def var(array,W=_W,B=None,square=0,sqrt=0,V=False,sizz=0):
        #W=tf.transpose(W, [0,2,3,1])
        
        arrs=array.shape
        ashp=W.shape
        sb=(W.shape[1],1,1)
        WV=W.shape[-2:]
        xi=(-2,-1)
        x2=(-2,-1,-3)

        if V:
            print(W.eval())
            print(arrs,ashp)
        mul=(array*W)

        if V:
            print('Wsamp',W[-1,-1].eval())
            print('array*w',(mul.eval())[0,-1])

        size=C.reduce_sum(W,axis=xi)#shape=(outputs, channel)

        if V:
            print("sizesamp",size.shape,size.eval())
        if B is None:
            B=C.constant(0,shape=W.shape[0:2],dtype=np.float32)#channel
        B=C.reshape(B,(*B.shape,*[1 for _ in range(len(ashp)-len(B.shape))]))
        if sizz==1:
            mean=C.reduce_sum(mul,axis=xi)/size
        else:
            mean=C.reduce_sum(mul,axis=xi)/C.constant(value=WV[0]*WV[1],shape=sb,dtype=np.float32)
        if V:
            print("meansamp",mean.eval()[0,-1])
        if square:
            i=(C.square(mul-mean)+B)
        else:
            i=(((mul)-mean)+B)
        di=i/size
        if V==2:
            print("i",i.eval(),"i")
            print("di",di.eval(),"di")
        if V:
            print('isamp',i.shape,i.eval()[-1,-1,])
        out=C.reduce_sum(i+B,axis=x2)
        #out=np.rollaxis(np.sum(i+B,axis=x2),-1,1)
        print(out.shape)
        if sqrt:
            out=C.sqrt(out)
        out=C.swapaxes(C.reshape(out,out.shape[:4]), 3, 1)
        print(out.shape)
        assert out.shape==(arrs[0],ashp[0],arrs[1],arrs[2])
        return(out)
示例#27
0
def linear_units(input_var, output_dim):
    input_dim = input_var.shape[0]
    # Introduce model parameters
    weight_param = C.parameter(shape=(output_dim, input_dim), name="weights")
    bias_param = C.parameter(shape=(output_dim, 1), name="biases")
    # Reshape to facilitate matrix multiplication
    input_reshaped = C.reshape(input_var, (input_dim, 1))
    # Weighted sums
    params['w'], params['b'] = weight_param, bias_param
    part1 = C.times(weight_param, input_reshaped)
    # Add biases
    part2 = part1 + bias_param
    # Return 1-D representation
    return C.reshape(part2, (num_classes))
示例#28
0
    def attention_layer(self, context, query):
        q_processed = C.placeholder(shape=(2 * self.hidden_dim, ))
        c_processed = C.placeholder(shape=(2 * self.hidden_dim, ))

        #convert query's sequence axis to static
        qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs

        # This part deserves some explanation
        # It is the attention layer
        # In the paper they use a 6 * dim dimensional vector
        # here we split it in three parts because the different parts
        # participate in very different operations
        # so W * [h; u; h.* u] becomes w1 * h + w2 * u + w3 * (h.*u)
        ws1 = C.parameter(shape=(2 * self.hidden_dim, 1),
                          init=C.glorot_uniform())
        ws2 = C.parameter(shape=(2 * self.hidden_dim, 1),
                          init=C.glorot_uniform())
        ws3 = C.parameter(shape=(1, 2 * self.hidden_dim),
                          init=C.glorot_uniform())
        att_bias = C.parameter(shape=(), init=0)

        wh = C.times(c_processed, ws1)
        wu = C.reshape(C.times(qvw, ws2), (-1, ))
        whu = C.reshape(
            C.reduce_sum(c_processed *
                         C.sequence.broadcast_as(qvw * ws3, c_processed),
                         axis=1), (-1, ))
        S = wh + whu + C.sequence.broadcast_as(wu, c_processed) + att_bias
        # mask out values outside of Query, and fill in gaps with -1e+30 as neutral value for both reduce_log_sum_exp and reduce_max
        qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, c_processed)
        S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30))
        q_attn = C.reshape(C.softmax(S), (-1, 1))
        #q_attn = print_node(q_attn)
        c2q = C.reshape(
            C.reduce_sum(C.sequence.broadcast_as(qvw, q_attn) * q_attn,
                         axis=0), (-1))

        max_col = C.reduce_max(S)
        c_attn = C.sequence.softmax(max_col)

        htilde = C.sequence.reduce_sum(c_processed * c_attn)
        q2c = C.sequence.broadcast_as(htilde, c_processed)
        q2c_out = c_processed * q2c

        att_context = C.splice(c_processed, c2q, c_processed * c2q, q2c_out)

        return C.as_block(att_context, [(c_processed, context),
                                        (q_processed, query)],
                          'attention_layer', 'attention_layer')
示例#29
0
def test_op_reshape(input_shape, output_shape, expected_output_shape,
                    device_id, precision):
    # Forward pass test
    #==================
    # we compute the expected output for the forward pass
    # we need two surrounding brackets
    # the first for sequences (length=1, since we have dynamic_axis='')
    # the second for batch of one sample

    num_tensor_elements = np.multiply.reduce(input_shape)
    input_tensor = np.arange(num_tensor_elements).reshape(input_shape)

    expected_tensor = input_tensor.reshape(expected_output_shape, order='C')

    a = I([input_tensor])

    # reshape into output shape

    reshaped_input = C.reshape(a, output_shape)

    unittest_helper(reshaped_input,
                    None, [[expected_tensor]],
                    device_id=device_id,
                    precision=precision,
                    clean_up=True,
                    backward_pass=False)

    # Backward pass test
    # ==================
    # Reshaping is just moving the input values to different indexes of the result tensor.
    # If we compute the gradients on the unmodified tensor, reshape would get 1 for all inputs.
    # For testing the gradients we want to have different gradients for each input index otherwise we can't
    # test if they get wrongly permuted during test. To this end we multiply the reshaping result with itself.
    # The expected gradient is identical to the input tensor.

    a = I([input_tensor])

    # reshape into output shape
    reshaped_input = C.reshape(a, output_shape)

    output = reshaped_input * expected_tensor

    unittest_helper(output,
                    None, [[input_tensor]],
                    device_id=device_id,
                    precision=precision,
                    clean_up=True,
                    backward_pass=True,
                    input_node=a)
    def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file):
        m = C.reshape(load_ascii_vector(feature_mean_file, 'feature_mean'),
                      shape=(1, feature_dim))
        s = C.reshape(load_ascii_vector(feature_inv_stddev_file,
                                        'feature_invstddev'),
                      shape=(1, feature_dim))

        def _func(operand):
            return C.reshape(C.element_times(
                C.reshape(operand,
                          shape=(1 + context[0] + context[1], feature_dim)) -
                m, s),
                             shape=operand.shape)

        return _func
示例#31
0
    def scale_dot_product_attention_block(self, contextQ, contextV, contextK,
                                          name):

        Q = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])
        V = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])
        K = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])

        Ql = C.layers.Dense(100)(Q)
        Vl = C.layers.Dense(100)(V)
        Kl = C.layers.Dense(100)(K)

        kvw, kvw_mask = C.sequence.unpack(Kl, padding_value=0).outputs
        vvw, _ = C.sequence.unpack(Vl, padding_value=0).outputs
        KT = C.swapaxes(kvw)

        S = C.reshape(C.times(Ql, KT) / math.sqrt(100), -1)
        kvw_mask_expanded = C.sequence.broadcast_as(kvw_mask, Ql)
        S = C.softmax(
            C.element_select(kvw_mask_expanded, S, C.constant(-1e+30)))
        att = C.times(S, vvw)

        return C.as_block(att, [(Q, contextQ), (V, contextV),
                                (K, contextK)], 'sdp_attention_block' + name,
                          'sdp_attention_block' + name)
示例#32
0
    def attention(encoded, network):
        abk = dense(network)
        a, b, k = gaussian_windows_attention_coefficients(abk, nb_mixtures)
        # print("abk shape:", a.shape, b.shape, k.shape)
        # a, b, k: [#, n] [nb_mixture, 1]
        # context: [#, c] [char_ohe]

        encoded_unpacked = C.sequence.unpack(encoded, padding_value=0, no_mask_output=True)
        # context_unpacked: [#] [*=c, char_ohe]
        u = Cx.sequence.position(encoded)  # position gives shape=(1, )
        # u: [#, c], [1]
        u_values, u_valid = C.sequence.unpack(u, padding_value=999_999).outputs
        # u_values: [#] [*=c, 1]
        # u_valid: [#] [*=c]
        u_values_broadcast = C.swapaxes(C.sequence.broadcast_as(u_values, k))
        # u_values_broadcast: [#, n] [1, *=c]
        u_valid_broadcast = C.sequence.broadcast_as(C.reshape(u_valid, (1,), 1), k)
        # u_valid_broadcast: [#, n] [*=c, 1] ~ shape verified correct at his point

        # print("u_values_broadcast shape:", u_values_broadcast.shape)
        # print("abk shape:", a.shape, b.shape, k.shape)
        phi = window_weight(a, b, k, u_values_broadcast)
        # phi: [#, n] [*=c, 1]
        zero = C.constant(0)
        phi = C.element_select(u_valid_broadcast, phi, zero, name="phi")
        # phi: [#, n] [*=c, 1]
        attended = C.reduce_sum(phi * C.sequence.broadcast_as(encoded_unpacked, phi), axis=0)
        # [#, n] [1, char_ohe]
        # print("attended_context shape:", attended_context.shape)
        output = C.squeeze(attended, name="GaussianWindowAttention")
        # [#, n] [char_ohe]
        return output
示例#33
0
def test_cntk_conv2d():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_conv2d()
    else:
        cntk_baseline_conv2d()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance

    input_var = C.input_variable(shape=sample_shape)
    input_reshaped = C.reshape(input_var, (1,)+sample_shape)
    conv_out = C.layers.Convolution2D(filter_shape, num_filters, activation=None)(input_reshaped)
    ci.watch(conv_out, 'conv2d', var_type=cstk.Conv2DAttr,
              attr=cstk.Conv2DAttr(filter_shape=filter_shape, num_filters=num_filters))
    ci.watch(conv_out, 'conv2d_out')

    data = {input_var:input_data}
    ci.set_data(data)
    ci.set_workdir(workdir)
    conv_out.eval(data)

    # load parameters from crosstalk and verify results are the same
    ci.assign('conv2d', load=True)
    assert ci.compare('conv2d_out', rtol=1e-4, atol=1e-6)

    ci.reset()
示例#34
0
def test_sequential_convolution_without_reduction_dim():
    c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False)
    c.update_signature(Sequence[Tensor[()]])  # input is a sequence of scalars
    data = [np.array([2., 6., 4., 8., 6.])]   # like a short audio sequence, in the dynamic dimension
    out = c(data)
    exp = [[24., 40., 38.]]
    np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension')

    c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False)
    c.update_signature(Sequence[Tensor[1]]) # input is a sequence of dim-1 vectors
    data = [np.array([[2.], [6], [4.], [8.], [6.]])]
    out = c(data)
    exp = [[[24.], [40.], [38]]] # not reducing; hence, output is also a sequence of dim-1 vectors
    np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension')

    # these cases failed before
    emb_dim = 10
    x = C.input_variable(**Sequence[Tensor[20]])
    m = Embedding(emb_dim)(x)
    m = Convolution(filter_shape=3, sequential=True)(m)

    # this one still fails
    # Reshape: Operand (sub-)dimensions '[3]' incompatible with desired replacement (sub-)dimensions '[]'. Number of elements must be the same..
    m = Embedding(emb_dim)(x)
    m = reshape(m, (emb_dim,1))
    m = Convolution(filter_shape=(3,1), num_filters=13, pad=True, sequential=True)(m)

    m = Embedding(emb_dim)(x)
    m = Convolution(filter_shape=3, pad=True, sequential=True)(m)
示例#35
0
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim):
    # Create the rnn that computes the latent representation for the next token.
    rnn_with_latent_output = Sequential([
        C.layers.Embedding(hidden_dim),
        For(
            range(num_layers), lambda: Sequential([
                Stabilizer(),
                Recurrence(LSTM(hidden_dim), go_backwards=False)
            ])),
    ])

    # Apply it to the input sequence.
    latent_vector = rnn_with_latent_output(input_sequence)

    # Connect the latent output to (sampled/full) softmax.
    if use_sampled_softmax:
        weights = load_sampling_weights(token_frequencies_file_path)
        smoothed_weights = np.float32(np.power(weights, alpha))
        sampling_weights = C.reshape(C.Constant(smoothed_weights),
                                     shape=(1, vocab_dim))
        z, ce, errs = cross_entropy_with_sampled_softmax(
            latent_vector, label_sequence, vocab_dim, hidden_dim,
            softmax_sample_size, sampling_weights)
    else:
        z, ce, errs = cross_entropy_with_full_softmax(latent_vector,
                                                      label_sequence,
                                                      vocab_dim, hidden_dim)

    return z, ce, errs
示例#36
0
    def input_layer(self, embed, cgw,cnw,cc,qgw,qnw,qc):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph  = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph  = C.placeholder()
  
        
        input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim,))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim,))

        # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension
        # todo GlobalPooling/reduce_max should have a keepdims default to False embedded = C.splice(
        embedded = C.splice(
            C.reshape(self.charcnn(input_chars), self.convs),
            embed(input_glove_words, input_nonglove_words), name='splice_embed')
        highway = HighwayNetwork(dim=2*self.hidden_dim, highway_layers=self.highway_layers)(embedded)
        highway_drop = C.layers.Dropout(self.dropout)(highway)
        processed = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn')(highway_drop)
        
        qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
#        ace = C.one_hot(ac_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
                
        q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph})
        c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph})

        return C.as_block(
            C.combine([c_processed, q_processed]),
            [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)],
            'input_layer',
            'input_layer')
示例#37
0
    def input_layer(self,cgw,cnw,cc,qgw,qnw,qc):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph  = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph  = C.placeholder()

        input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim,))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim,))

        # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension
        # todo GlobalPooling/reduce_max should have a keepdims default to False
        embedded = C.splice(
            C.reshape(self.charcnn(input_chars), self.convs),
            self.embed()(input_glove_words, input_nonglove_words), name='splice_embed')
        processed = C.layers.Sequential([For(range(2), lambda: OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn'))])(embedded)
        
        qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        
        q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph})
        c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph})
        return C.as_block(
            C.combine([c_processed, q_processed]),
            [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)],
            'input_layer',
            'input_layer')
示例#38
0
def test_sequence_unpack_with_convolution(device_id, precision): 
    x = C.sequence.input((20, 20))
    y = C.sequence.unpack(x, 0, no_mask_output=True)
    z = C.reshape(y, (3, 20, 20))
    kernel = C.constant(1.0, (4, 3, 3, 3))
    t = C.convolution(kernel, z, auto_padding=[False, True, True])
    val = np.random.random((2, 3, 20, 20)).astype(np.float32)
    result = t.eval({x: val})
    assert np.array_equal(result.shape, (2, 4, 20, 20))
示例#39
0
def test_op_reshape(inputShape, outputShape, expectedOutputShape, device_id, precision):
    # Forward pass test
    #==================
    # we compute the expected output for the forward pass
    # we need two surrounding brackets
    # the first for sequences (length=1, since we have dynamic_axis='')
    # the second for batch of one sample
                        
    num_tensor_elements = np.multiply.reduce(inputShape)
    input_tensor = np.arange(num_tensor_elements).reshape(inputShape)
        
    expected_tensor = input_tensor.reshape(expectedOutputShape, order='F')

    a = I([input_tensor])

    # reshape into output shape
    reshaped_input = C.reshape(a, outputShape)

    unittest_helper(reshaped_input, None, [[expected_tensor]], device_id=device_id, 
                precision=precision, clean_up=True, backward_pass=False)

    # Backward pass test
    # ==================
    # Reshaping is just moving the input values to different indexes of the result tensor.
    # If we would compute the gradients on the unmodified reshape would would get 1 for all inputs.
    # For testing the gradients we want to have different gradients for each input index otherwise we can't
    # test if they get wrongly permuted during test. To this end we multiply the reshaping result with some weight tensor. 
    # For convienience choose '100 * expected_tensor' as weight.
    # The expected gradient is identical to this weight tensor reshaped according the input shape.

    a = I([input_tensor])

    # reshape into output shape
    reshaped_input = C.reshape(a, outputShape)

    some_factor = 100
    weight =  expected_tensor * some_factor

    output = reshaped_input * weight
    expected_gradient = input_tensor * some_factor 
    
    unittest_helper(output, None, [[expected_gradient]], device_id = device_id,
                    precision=precision, clean_up=True, backward_pass=True, input_node=a)
示例#40
0
def test_op_reshape_free_dimension(device_id):
    dev = cntk_device(device_id)
    x = C.input_variable((C.FreeDimension, 2, 2))

    x_reshaped_1 = C.reshape(x, (-1,), 0, 2)
    data = [[[1, 2], [3, 4]]]
    result = x_reshaped_1.eval({x : np.asarray(data, dtype=np.float32)})
    assert np.array_equal(result[0], data[0])
    data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
    result = x_reshaped_1.eval({x : np.asarray(data, dtype=np.float32)})
    assert np.array_equal(result[0], np.reshape(data, (4, 2)))

    x_reshaped_2 = C.reshape(x, (-1,), 1, 3)
    data = [[[1, 2], [3, 4]]]
    result = x_reshaped_2.eval({x : np.asarray(data, dtype=np.float32)})
    assert np.array_equal(result[0], np.reshape(data, (1, 4)))
    data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
    result = x_reshaped_2.eval({x : np.asarray(data, dtype=np.float32)})
    assert np.array_equal(result[0], np.reshape(data, (2, 4)))
示例#41
0
def test_depth_to_space(image_shape, num_channels, block_size, device_id, precision):
    dev = cntk_device(device_id)
    from cntk.internal import sanitize_dtype_cntk

    input_val = np.array(np.reshape(range(num_channels), (num_channels, 1, 1)), dtype=PRECISION_TO_TYPE[precision])
    input_val = np.tile(input_val, (1,) + image_shape)
    img = C.input_variable((num_channels,) + image_shape, dtype=sanitize_dtype_cntk(PRECISION_TO_TYPE[precision]))

    # Result from depth_to_space node.
    depth_to_space_op = C.depth_to_space(img, block_size)
    output_test = depth_to_space_op.eval({ img : input_val })

    # Reference result from simulating depth_to_space with other CNTK ops.
    h, w = image_shape
    reshape_node = C.reshape(img, (block_size, block_size, num_channels // (block_size**2), h, w))
    transpose_node = C.transpose(reshape_node, [2, 3, 0, 4, 1])
    depth_to_space_sim_op = C.reshape(transpose_node, (num_channels // (block_size**2), h * block_size, w * block_size))
    output_ref = depth_to_space_sim_op.eval({ img : input_val })

    assert np.array_equal(output_test, output_ref)
示例#42
0
def test_sequence_unpack_with_convolution(device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    x = C.sequence.input((20, 20), dtype=dt)
    y = C.sequence.unpack(x, 0, no_mask_output=True)
    z = C.reshape(y, (3, 20, 20))
    kernel = C.constant(1.0, (4, 3, 3, 3), device=dev)
    t = C.convolution(kernel, z, auto_padding=[False, True, True])
    val = np.random.random((2, 3, 20, 20)).astype(dt)
    result = t.eval({x: val}, device=dev)
    assert np.array_equal(result.shape, (2, 4, 20, 20))
示例#43
0
def cross_entropy_with_sampled_softmax(
    hidden_vector,           # Node providing the output of the recurrent layers
    target_vector,           # Node providing the expected labels (as sparse vectors)
    vocab_dim,               # Vocabulary size
    hidden_dim,              # Dimension of the hidden vector
    num_samples,             # Number of samples to use for sampled softmax
    sampling_weights,        # Node providing weights to be used for the weighted sampling
    allow_duplicates = False # Boolean flag to control whether to use sampling with replacement (allow_duplicates == True) or without replacement.
    ):
    bias = C.Parameter(shape = (vocab_dim, 1), init = 0)
    weights = C.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform())

    sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates) # sparse matrix [num_samples * vocab_size]
    if use_sparse:
        sample_selector = sample_selector_sparse
    else:
        # Note: Sampled softmax with dense data is only supported for debugging purposes.
        # It might easily run into memory issues as the matrix 'I' below might be quite large.
        # In case we wan't to a dense representation for all data we have to convert the sample selector
        I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
        sample_selector = C.times(sample_selector_sparse, I)

    inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size]
    log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim]


    print("hidden_vector: "+str(hidden_vector.shape))
    wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim]
    print("ws:"+str(wS.shape))
    zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples]

    # Getting the weight vector for the true label. Dimension hidden_dim
    wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim]
    zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1]


    zSReduced = C.reduce_log_sum_exp(zS)

    # Compute the cross entropy that is used for training.
    # We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
    # twice in the normalizing denominator of sampled softmax.
    cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT

    # For applying the model we also output a node providing the input for the full softmax
    z = C.times_transpose(weights, hidden_vector) + bias
    z = C.reshape(z, shape = (vocab_dim))

    zSMax = C.reduce_max(zS)
    error_on_samples = C.less(zT, zSMax)
    return (z, cross_entropy_on_samples, error_on_samples)
示例#44
0
def CustomMultibitKernel(input, bit_map, mean_bits=None):
    if (mean_bits):
        bit_map = np.asarray(np.maximum(np.round(np.random.normal(mean_bits, 1, input.shape)), 1), dtype=np.int32)
        print("Mean Bits: ",np.mean(bit_map))
    else:
        if (type(bit_map) == int):
            length = C.reshape(input, (-1))
            bit_map = [bit_map]*length.shape[0]
            bit_map = np.asarray(bit_map)
            bit_map = bit_map.reshape(input.shape)
        else:
            bit_map = np.asarray(bit_map)
    assert (bit_map.shape == input.shape)
    return user_function(MultibitKernel(input, bit_map))
示例#45
0
def test_convert_dynamic_axis():
    #test fix batch size
    batch_size = 4
    a = C.parameter(shape=(batch_size, 2, 3), init=1)
    dynamic_a = C.to_batch(a)
    assert len(dynamic_a.dynamic_axes) == 1
    assert dynamic_a.shape == (2, 3)

    x = C.input_variable((2, 3))
    y = x * dynamic_a

    #test grad
    data = np.arange(batch_size * 2 * 3).reshape(batch_size, 2, 3).astype('f')
    assert np.array_equal(y.grad({x:data}, [a]), data)

    const_a = C.unpack_batch(y)
    assert len(const_a.dynamic_axes) == 0
    assert const_a.shape == (C.FreeDimension, 2, 3)

    f = C.assign(a, const_a)
    f.eval({x:data})
    assert np.array_equal(a.value, data)

    #test reshape with batch axis
    x = C.input_variable((2,3))
    const_x = C.unpack_batch(x)
    assert len(const_x.dynamic_axes) == 0
    assert const_x.shape == (C.FreeDimension, 2, 3)

    const_y = C.reshape(const_x, (-1, 3))
    assert const_y.shape == (C.FreeDimension, 3)
    y = C.to_batch(const_y)
    assert len(y.dynamic_axes) == 1
    assert y.shape == (3,)

    z = y * 2
    expected = data.reshape((8, 3)) * 2
    assert np.array_equal(z.eval({x:data}), expected)

    #test inferred dimension
    x = C.input_variable((C.InferredDimension, 3))
    const_x = C.unpack_batch(x)
    assert len(const_x.dynamic_axes) == 0
    assert const_x.shape == (C.FreeDimension, C.InferredDimension, 3)

    const_y = const_x * 2
    y = C.to_batch(const_y)
    assert len(y.dynamic_axes) == 1
    assert y.shape == (C.InferredDimension, 3)
示例#46
0
def cross_entropy_with_full_softmax(
    hidden_vector,  # Node providing the output of the recurrent layers
    target_vector,  # Node providing the expected labels (as sparse vectors)
    vocab_dim,      # Vocabulary size
    hidden_dim      # Dimension of the hidden vector
    ):
    bias = C.Parameter(shape = (vocab_dim, 1), init = 0)
    weights = C.Parameter(shape = (vocab_dim, hidden_dim), init = C.initializer.glorot_uniform())

    z = C.reshape(C.times_transpose(weights, hidden_vector) + bias, (1,vocab_dim))
    zT = C.times_transpose(z, target_vector)
    ce = C.reduce_log_sum_exp(z) - zT
    zMax = C.reduce_max(z)
    error_on_samples = C.less(zT, zMax)
    return (z, ce, error_on_samples)
示例#47
0
def test_op_times_sparse_grad(device_id, precision):
    dt_precision = PRECISION_TO_TYPE[precision]

    from cntk import times, times_transpose, parameter, reshape, Value, sequence
    dim = 5
    num_sequences = 2
    seq = [i for i in range(dim)]
    identity = np.identity(dim, dtype=dt_precision)
    input_data = Value.one_hot([seq]*num_sequences, dim, dtype=dt_precision)
    input_var  = sequence.input_variable(shape=(dim), is_sparse=True, needs_gradient=False, dtype=dt_precision)
    e = parameter(shape = (dim, dim), init = identity, dtype=dt_precision)
    z = reshape(times_transpose(e, times(input_var, e)), dim)
    e_grad = z.grad({input_var : input_data}, [e])
    
    assert np.allclose(e_grad, np.ones((dim,dim))*4)
示例#48
0
def cntk_baseline_conv2d():
    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance
    input_var = C.input_variable(shape=sample_shape)
    input_reshaped = C.reshape(input_var, (1,)+sample_shape)
    conv_out = C.layers.Convolution2D(filter_shape, num_filters, init_bias=C.glorot_uniform())(input_reshaped)
    ci.watch(conv_out, 'conv2d', var_type=cstk.Conv2DAttr,
              attr=cstk.Conv2DAttr(filter_shape=filter_shape, num_filters=num_filters))
    ci.watch(conv_out, 'conv2d_out')
    data = {input_var:input_data}
    ci.set_data(data)
    ci.set_workdir(workdir)
    ci.fetch('conv2d', save=True)
    ci.fetch('conv2d_out', save=True)
    ci.reset()
示例#49
0
def test_data_resize():
    batch_size = 8
    w = C.parameter(shape=(3, 2), name='w1')
    x = C.input_variable(shape=[3], name='x')
    y = C.softmax(C.times(x, w))
    y = C.unpack_batch(y)
    y = C.reshape(y, [batch_size * 2])
    loss = C.reduce_mean(-C.log(y))

    learning_rate = 0.01
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(y.parameters, lr_schedule, gradient_clipping_threshold_per_sample=1.0)
    trainer = C.Trainer(y, (loss), [learner])

    features = np.random.randn(batch_size, 3)
    trainer.train_minibatch({x: features})
示例#50
0
def test_conv_free_static_with_sequence_unpack(num_features, sequence_len, filter_size, num_output_channels, batch_size, device_id, precision):
    dt = PRECISION_TO_TYPE[precision]
    dev = cntk_device(device_id)

    x_ref = C.input_variable((1, sequence_len, num_features), dtype=dt)
    conv_map_ref = C.constant(np.random.randn(num_output_channels, 1, filter_size[0], filter_size[1]).astype(dt), device=dev)
    w2_ref = C.convolution(conv_map_ref, x_ref, auto_padding=[False])
    x0_ref = np.arange(batch_size*1*sequence_len*num_features).astype(dt).reshape(batch_size, 1, sequence_len, num_features)
    output_ref = w2_ref.eval({x_ref: x0_ref}, device=dev)


    x_test = C.sequence.input_variable(num_features, dtype=dt)
    y_test, mask_test = C.sequence.unpack(x_test, 0).outputs
    z_test = C.reshape(y_test, (1, ), 0, 0)
    w2_test = C.convolution(conv_map_ref, z_test, auto_padding=[False])
    output_test = w2_test.eval({x_test: np.squeeze(x0_ref)}, device=dev)
    
    assert np.allclose(output_test, output_ref, atol=1e-4)
示例#51
0
 def multiFunc(self, arg1):
     multiIn = C.input(shape=arg1.shape, dynamic_axes = arg1.dynamic_axes)
     bit_map = C.constant(self.bit_map)
     max_bits = self.bit_map.max()
     shape = multiIn.shape
     reformed = C.reshape(multiIn, (-1,))
     carry_over = multiIn
     approx = C.element_times(multiIn, 0)
     for i in range(max_bits):
         hot_vals = C.greater(bit_map, i)
         valid_vals = C.element_select(hot_vals, carry_over, 0)
         mean = C.element_divide(C.reduce_sum(C.abs(valid_vals)), C.reduce_sum(hot_vals))
         bits = C.greater(carry_over, 0)
         bits = C.element_select(bits, bits, -1)
         bits = C.element_select(hot_vals, bits, 0)
         approx = C.plus(approx, C.element_times(mean, bits))
         carry_over = C.plus(C.element_times(C.element_times(-1, bits), mean), carry_over)
         
     return approx, multiIn
示例#52
0
def create_binary_convolution_model():

    # Input variables denoting the features and label data
    feature_var = C.input((num_channels, image_height, image_width))
    label_var = C.input((num_classes))

    # apply model to input
    scaled_input = C.element_times(C.constant(0.00390625), feature_var)

    # first layer is ok to be full precision
    z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True)
    z = C.layers.MaxPooling((3,3), strides=(2,2))(z)

    z = C.layers.BatchNormalization(map_rank=1)(z)
    z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True)
    z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z)
    z = C.reshape(z, (num_classes,))

    # Add binary regularization (ala Gang Hua)
    weight_sum = C.constant(0)
    for p in z.parameters:
        if (p.name == "filter"):
            weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p))))
    bin_reg = C.element_times(.000005, weight_sum)

    # After the last layer, we need to apply a learnable scale
    SP = C.parameter(shape=z.shape, init=0.001)
    z = C.element_times(z, SP)

    # loss and metric
    ce = C.cross_entropy_with_softmax(z, label_var)
    ce = C.plus(ce, bin_reg)
    pe = C.classification_error(z, label_var)

    return C.combine([z, ce, pe])
示例#53
0
def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim):
    # Create the rnn that computes the latent representation for the next token.
    rnn_with_latent_output = Sequential([
        C.Embedding(hidden_dim),   
        For(range(num_layers), lambda: 
            Sequential([Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False)])),
        ])

    
    # Apply it to the input sequence. 
    latent_vector = rnn_with_latent_output(input_sequence)

    # Connect the latent output to (sampled/full) softmax.
    if use_sampled_softmax:
        weights = load_sampling_weights(token_frequencies_file_path)
        smoothed_weights = np.float32( np.power(weights, alpha))
        sampling_weights = C.reshape(C.Constant(smoothed_weights), shape = (1,vocab_dim))
        z, ce, errs = cross_entropy_with_sampled_softmax(latent_vector, label_sequence, vocab_dim, hidden_dim, softmax_sample_size, sampling_weights)
    else:
        z, ce, errs = cross_entropy_with_full_softmax(latent_vector, label_sequence, vocab_dim, hidden_dim)

    return z, ce, errs
示例#54
0
def test_cntk_conv2d():
    try:
        import tensorflow
        has_tensorflow = True
    except:
        has_tensorflow = False

    if has_tensorflow:
        tf_baseline_conv2d()
    else:
        cntk_baseline_conv2d()

    import cntk as C
    import cntk.contrib.crosstalk.crosstalk_cntk as crct
    ci = crct.instance

    input_var = C.sequence.input_variable(shape=sample_shape)
    input_reshaped = C.reshape(input_var, (1,)+sample_shape)
    conv_out = C.layers.Convolution2D(filter_shape, num_filters, activation=None)(input_reshaped)

    ci.watch(conv_out, 'conv2d', var_type=cstk.Conv2DAttr,
              attr=cstk.Conv2DAttr(filter_shape=filter_shape, num_filters=num_filters))
    ci.watch(conv_out, 'conv2d_out')

    data = {input_var:input_data}
    ci.set_data(data)
    ci.set_workdir(workdir)
    conv_out_values = conv_out.eval(data)

    # load parameters from crosstalk and verify results are the same
    ci.assign('conv2d', load=True)
    assert ci.compare('conv2d_out', rtol=1e-4, atol=1e-6)

    # test assign with value
    ci.assign('conv2d', value=cstk.Conv2DArgs(W=np.random.random((num_filters,) + filter_shape).astype(np.float32),
                                              b=np.random.random((num_filters,)).astype(np.float32)))

    ci.reset()
示例#55
0
def test_Reshape(tmpdir, dtype):
    with C.default_options(dtype = dtype):
        data = np.asarray([[[[0., 1.],[2., 3.],[4., 5.]]]], dtype=dtype)
        i1 = C.input_variable(shape=(3,2))
        model = C.reshape(i1, (2,3))
        verify_one_input(model, data, tmpdir, 'Reshape_1')
示例#56
0
def test_Reshape(tmpdir):
    data = np.asarray([[[[0., 1.],[2., 3.],[4., 5.]]]], dtype=np.float32)
    i1 = C.input_variable(shape=(3,2))
    model = C.reshape(i1, (2,3))
    verify_one_input(model, data, tmpdir, 'Reshape_1')
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None, conv_bias_init=0.0):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
                                         [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                         'CE_with_ignore', 'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
                                          [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
                                           (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
                                          'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")

    return rpn_rois, rpn_losses
示例#58
0
def hierarchical_softmax_layer_for_sequence(input_var, num_output_classes, target_class, target_output_in_class, batch_size, w1, b1, w2s, b2s):
    '''
    A two layers hierarchical softmax function with sequence axis input:

    Example:
        >>> input_dim = 2
        >>> num_output_classes = 4
        >>> minibatch_size = 3
        >>> seq_size = 5
        >>> n_classes = int(math.ceil(math.sqrt(num_output_classes)))
        >>> n_outputs_per_class = n_classes

        >>> w1 = C.parameter(shape=(input_dim, n_classes), init=C.glorot_normal(seed=2), name='w1')
        >>> b1 = C.parameter(shape=(n_classes), init=C.glorot_normal(seed=3), name='b1')
        >>> w2s = C.parameter(shape=(n_classes, input_dim, n_outputs_per_class), init=C.glorot_normal(seed=4), name='w2s')
        >>> b2s = C.parameter(shape=(n_classes, n_outputs_per_class), init=C.glorot_normal(seed=5), name='b2s')

        # neural network structure for hierarchical softmax
        >>> h_input = C.sequence.input_variable(input_dim)
        >>> h_target_class = C.sequence.input_variable([1])
        >>> h_target_output_in_class = C.sequence.input_variable([1])
        >>> h_z, class_probs, all_probs = hierarchical_softmax_layer_for_sequence(h_input, num_output_classes, h_target_class, h_target_output_in_class, minibatch_size, w1, b1, w2s, b2s)

        >>> a = np.reshape(np.arange(seq_size * minibatch_size * input_dim, dtype = np.float32), (seq_size, minibatch_size, input_dim))
        >>> labels = np.reshape(np.arange(seq_size * minibatch_size, dtype = np.float32), (seq_size, minibatch_size, 1)) % num_output_classes
        >>> target_labels = labels // n_outputs_per_class
        >>> target_output_in_labels = labels % n_outputs_per_class
        >>> h_z.eval({h_input: a, h_target_class: target_labels, h_target_output_in_class: target_output_in_labels})[1]
        array([[ 0.000859],
               [ 0.      ],
               [ 0.      ]], dtype=float32)

    Args:
        input_var: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        num_output_classes: int
        target_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        target_output_in_class: class:`~cntk.ops.functions.Function` that outputs a tensor with sequence axis and batch axis
        batch_size: int
        w1: C.parameter
        b1: C.parameter
        w2s: C.parameter
        b2s: C.parameter
    Returns:
        output_prob: class:`~cntk.ops.functions.Function`
        class_probs: class:`~cntk.ops.functions.Function`
        all_probs: a list of class:`~cntk.ops.functions.Function`
    '''
    input_dim = input_var.shape[0]

    n_classes = int(math.ceil(math.sqrt(num_output_classes)))
    n_outputs_per_class = n_classes

    class_probs = C.softmax(b1 + C.times(input_var, w1))

    w2_temp = C.gather(w2s, target_class)
    w2 = reshape(w2_temp, (input_dim, n_outputs_per_class))
    w2 = C.sequence.broadcast_as(w2, input_var)
    b2 = reshape(C.gather(b2s, target_class), (n_outputs_per_class))
    b2 = C.sequence.broadcast_as(b2, input_var)

    times_result = times(input_var, w2)
    probs_in_class = softmax(b2 + times_result)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    target_output_in_class = C.one_hot(target_output_in_class, n_outputs_per_class, False)
    probs_in_class = C.sequence.broadcast_as(probs_in_class, target_output_in_class)
    prob_in_class = C.times_transpose(probs_in_class, target_output_in_class)
    target_class = C.one_hot(target_class, n_classes, False)
    class_probs = C.sequence.broadcast_as(class_probs, target_class)
    class_prob = C.times_transpose(class_probs, target_class)

    output_prob = C.element_times(class_prob, prob_in_class)

    # this is for calculating all the outputs' probabilities
    all_probs = []
    for i in range(n_classes):
        ci = C.constant(i)
        w2a = C.reshape(C.gather(w2s, ci), (input_dim, n_outputs_per_class))
        w2a = C.sequence.broadcast_as(w2a, input_var)
        b2a = C.reshape(C.gather(b2s, ci), (n_outputs_per_class))
        b2a = C.sequence.broadcast_as(b2a, input_var)

        probs_in_classa = C.softmax(b2a + times(input_var, w2a))
        cia = C.constant(i, shape=[1])
        cia = C.reconcile_dynamic_axes(cia, class_probs)
        cia = C.one_hot(cia, n_outputs_per_class, False)
        class_proba = C.times_transpose(class_probs, cia)
        class_proba = C.sequence.broadcast_as(class_proba, probs_in_classa)

        output_proba = C.element_times(class_proba, probs_in_classa)
        all_probs.append(output_proba)

    return output_prob, class_probs, all_probs
示例#59
0
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         (image_widht, image_height, image_scale) as CNTK variable or constant
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    rpn_conv_3x3 = Convolution((3, 3), 256, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=0.1)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=0.1)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(np.prod(rpn_cls_score.shape) / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions))
    rpn_cls_prob = softmax(rpn_cls_score_rshp, axis=0, name="objness_softmax")
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape)

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # For loss functions: ignore label predictions for the 'ignore label',
        # i.e. set target and prediction to 0 --> needs to be softmaxed before
        rpn_labels_rshp = reshape(rpn_labels, (1, num_predictions))
        ignore = user_function(IgnoreLabel(rpn_cls_prob, rpn_labels_rshp, ignore_label=-1))
        rpn_cls_prob_ignore = ignore.outputs[0]
        fg_targets = ignore.outputs[1]
        bg_targets = 1 - fg_targets
        rpn_labels_ignore = splice(bg_targets, fg_targets, axis=0)

        # RPN losses
        rpn_loss_cls = cross_entropy_with_softmax(rpn_cls_prob_ignore, rpn_labels_ignore, axis=0)
        rpn_loss_bbox = user_function(SmoothL1Loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights))
        rpn_losses = plus(reduce_sum(rpn_loss_cls), reduce_sum(rpn_loss_bbox), name="rpn_losses")

    return rpn_rois, rpn_losses