Exemplo n.º 1
0
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
    # classification loss
    cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)

    p_cls_loss = placeholder()
    p_rois = placeholder()
    # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
    roi_indicator = reduce_sum(p_rois, axis=1)
    cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
    cls_normalization_factor = 1.0 / cls_num_terms
    normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_loss, cls_loss), (p_rois, rois)],
                                     'Normalize', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    # The bbox loss is normalized by the batch size
    bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
                                     'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 2
0
def create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights):
    # classification loss
    cls_loss = cross_entropy_with_softmax(cls_score, label_targets, axis=1)

    p_cls_loss = placeholder()
    p_rois = placeholder()
    # The terms that are accounted for in the cls loss are those that correspond to an actual roi proposal --> do not count no-op (all-zero) rois
    roi_indicator = reduce_sum(p_rois, axis=1)
    cls_num_terms = reduce_sum(cntk.greater_equal(roi_indicator, 0.0))
    cls_normalization_factor = 1.0 / cls_num_terms
    normalized_cls_loss = reduce_sum(p_cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_loss, cls_loss), (p_rois, rois)],
                                     'Normalize', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg["CNTK"].SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    # The bbox loss is normalized by the batch size
    bbox_normalization_factor = 1.0 / cfg["TRAIN"].BATCH_SIZE
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
                                     'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 3
0
def multi_headed_self_attention_layer(in_dims: int,
                                      hidden_dims: int,
                                      num_of_head: int,
                                      name='multi_headed_self_attention',
                                      as_block: bool = False,
                                      k_ph: bool = False,
                                      v_ph: bool = False,
                                      mask_opt: bool = False) -> C.Function:
    X = C.placeholder(
        in_dims, (C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()),
        name=name + '_ph')

    outputs = []

    if k_ph is False and v_ph is False:
        for i in range(num_of_head):
            layer = self_attention_layer(in_dims,
                                         hidden_dims,
                                         name=name + str(i),
                                         as_block=not as_block,
                                         mask_opt=mask_opt)
            outputs.append(layer(X))
    elif k_ph is True and v_ph is True:
        k_ = C.placeholder(in_dims,
                           (C.Axis.default_batch_axis(), C.Axis('kv_seq')),
                           name=name + '_k_ph')  # -3: sequence axis
        v_ = C.placeholder(in_dims,
                           (C.Axis.default_batch_axis(), C.Axis('kv_seq')),
                           name=name + '_v_ph')
        for i in range(num_of_head):
            layer = self_attention_layer(in_dims,
                                         in_dims,
                                         name=name + str(i),
                                         as_block=not as_block,
                                         k_ph=k_ph,
                                         v_ph=v_ph)
            outputs.append(layer(X, k_, v_))
    else:
        raise Exception(f'k_ph:{k_ph}, v_ph:{v_ph}')

    concat = C.splice(*outputs, name='concat')

    result = C.layers.Dense(in_dims, name='W_o')(concat)

    # init = C.initializer.normal(1)
    # W_O = C.parameter((in_dims, hidden_dims*num_of_head), init=init, name=name+'_Wo')
    # result = C.times_transpose(concat, W_O, name='result')

    if as_block is True:
        if k_ph is False and v_ph is False:
            result = C.as_block(result, [(X, X)], 'multi_headed_self_attetion',
                                'multi_headed_self_attetion_')
        elif k_ph is True and v_ph is True:
            result = C.as_block(result, [(X, X), (k_, k_), (v_, v_)],
                                'multi_headed_self_attetion',
                                'multi_headed_self_attetion_')
        else:
            raise Exception(f'k_ph:{k_ph} v_ph:{v_ph}')

    return result
Exemplo n.º 4
0
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg):
    # The losses are normalized by the batch size
    # classification loss
    p_cls_score = placeholder()
    p_label_targets = placeholder()
    cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1)
    cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_score, cls_score), (p_label_targets, label_targets)],
                                     'CrossEntropyWithSoftmax', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                     [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets), (p_bbox_inside_weights, bbox_inside_weights)],
                                     'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 5
0
def create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg):
    # The losses are normalized by the batch size
    # classification loss
    p_cls_score = placeholder()
    p_label_targets = placeholder()
    cls_loss = cross_entropy_with_softmax(p_cls_score, p_label_targets, axis=1)
    cls_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_cls_loss = reduce_sum(cls_loss) * cls_normalization_factor

    reduced_cls_loss = cntk.as_block(normalized_cls_loss,
                                     [(p_cls_score, cls_score), (p_label_targets, label_targets)],
                                     'CrossEntropyWithSoftmax', 'norm_cls_loss')

    # regression loss
    p_bbox_pred = placeholder()
    p_bbox_targets = placeholder()
    p_bbox_inside_weights = placeholder()
    bbox_loss = SmoothL1Loss(cfg.SIGMA_DET_L1, p_bbox_pred, p_bbox_targets, p_bbox_inside_weights, 1.0)
    bbox_normalization_factor = 1.0 / cfg.NUM_ROI_PROPOSALS
    normalized_bbox_loss = reduce_sum(bbox_loss) * bbox_normalization_factor

    reduced_bbox_loss = cntk.as_block(normalized_bbox_loss,
                                      [(p_bbox_pred, bbox_pred), (p_bbox_targets, bbox_targets),
                                       (p_bbox_inside_weights, bbox_inside_weights)],
                                      'SmoothL1Loss', 'norm_bbox_loss')

    detection_losses = plus(reduced_cls_loss, reduced_bbox_loss, name="detection_losses")

    return detection_losses
Exemplo n.º 6
0
    def simi_attention(self, input, memory):
        '''
        return:
        memory weighted vectors over input [#,c][d]
        weight
        '''
        input_ph = C.placeholder()  # [#,c][d]
        mem_ph = C.placeholder()  # [#,q][d]

        input_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1)
        mem_dense = Dense(2 * self.hidden_dim, bias=False, input_rank=1)
        bias = C.Parameter(shape=(2 * self.hidden_dim, ), init=0.0)
        weight_dense = Dense(1, bias=False, input_rank=1)

        proj_inp = input_dense(input_ph)  # [#,c][d]
        proj_mem = mem_dense(mem_ph)  # [#,q][d]
        unpack_memory, mem_mask = C.sequence.unpack(
            proj_mem, 0).outputs  # [#][*=q, d] [#][*=q]
        expand_mem = C.sequence.broadcast_as(unpack_memory,
                                             proj_inp)  # [#,c][*=q,d]
        expand_mask = C.sequence.broadcast_as(mem_mask, proj_inp)  # [#,c][*=q]
        matrix = C.reshape(weight_dense(C.tanh(proj_inp + expand_mem + bias)),
                           (-1, ))  # [#,c][*=q]
        matrix = C.element_select(expand_mask, matrix, -1e30)
        logits = C.softmax(matrix, axis=0)  # [#,c][*=q]
        weight_mem = C.reduce_sum(C.reshape(logits, (-1, 1)) * expand_mem,
                                  axis=0)  # [#,c][d]
        weight_mem = C.reshape(weight_mem, (-1, ))

        return C.as_block(C.combine(weight_mem, logits), [(input_ph, input),
                                                          (mem_ph, memory)],
                          'simi_attention', 'simi_attention')
Exemplo n.º 7
0
def triangular_matrix_seq(mode: int = 1):
    X = C.placeholder(1)
    ones = C.ones_like(X[0])
    perm_1 = C.layers.Recurrence(C.plus, return_full_state=True)(ones)
    perm_2 = C.layers.Recurrence(C.plus,
                                 go_backwards=True,
                                 return_full_state=True)(ones)

    arr_1 = C.sequence.unpack(perm_1, 0, True)
    arr_2 = C.sequence.unpack(perm_2, 0, True)

    mat = C.times_transpose(arr_1, arr_2)
    mat_c = arr_1 * arr_2

    diagonal_mat = mat - mat_c

    final_mat = diagonal_mat
    if mode == 0:
        final_mat = C.equal(final_mat, 0)
    elif mode == 1:
        final_mat = C.less_equal(final_mat, 0)
    elif mode == 2:
        final_mat = C.less(final_mat, 0)
    elif mode == -1:
        final_mat = C.greater_equal(final_mat, 0)
    elif mode == -2:
        final_mat = C.greater(final_mat, 0)

    result = C.as_block(final_mat, [(X, X)], 'triangular_matrix')

    return C.stop_gradient(result)
Exemplo n.º 8
0
    def input_layer(self,cgw,cnw,cc,qgw,qnw,qc):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph  = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph  = C.placeholder()

        input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim,))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim,))

        # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension
        # todo GlobalPooling/reduce_max should have a keepdims default to False
        embedded = C.splice(
            C.reshape(self.charcnn(input_chars), self.convs),
            self.embed()(input_glove_words, input_nonglove_words), name='splice_embed')
        processed = C.layers.Sequential([For(range(2), lambda: OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn'))])(embedded)
        
        qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        
        q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph})
        c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph})
        return C.as_block(
            C.combine([c_processed, q_processed]),
            [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)],
            'input_layer',
            'input_layer')
Exemplo n.º 9
0
    def output_layer(self, attention_context, modeling_context):
        att_context = C.placeholder()
        mod_context = C.placeholder()
        #output layer [#,c][1]
        start_logits = C.layers.Dense(1, name='out_start')(C.dropout(
            C.splice(mod_context, att_context), self.dropout))
        start_logits = C.sequence.softmax(start_logits)
        start_hardmax = seq_hardmax(start_logits)  # [000010000]
        att_mod_ctx = C.sequence.last(
            C.sequence.gather(mod_context, start_hardmax))  # [#][2*hidden_dim]
        att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx,
                                                       att_context)
        end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded,
                             mod_context *
                             att_mod_ctx_expanded)  # [#, c][14*hidden_dim]
        m2 = OptimizedRnnStack(self.hidden_dim,
                               bidirectional=True,
                               use_cudnn=self.use_cudnn,
                               name='output_rnn')(end_input)
        end_logits = C.layers.Dense(1, name='out_end')(C.dropout(
            C.splice(m2, att_context), self.dropout))
        end_logits = C.sequence.softmax(end_logits)

        return C.as_block(C.combine([start_logits, end_logits]),
                          [(att_context, attention_context),
                           (mod_context, modeling_context)], 'output_layer',
                          'output_layer')
Exemplo n.º 10
0
def test_block_with_unused_outputs():
    p1 = C.placeholder()
    p3 = C.placeholder()
    func1 = C.as_block(p1 + 1, [(p1, p3)], 'plus_func_1')
    p2 = C.placeholder()
    p4 = C.placeholder()
    func2 = C.as_block(p2 + 1, [(p2, p4)], 'plus_func_2')
    p5 = C.placeholder()
    func3 = C.as_block(C.combine([func2]), [(p4, p5)], 'empty_block')
    input_var1 = C.input_variable(shape=())
    input_var2 = C.input_variable(shape=())
    block = C.as_block(C.combine([func1, func3]), [(p3, input_var1), (p5, input_var2)], 'multi_output_block')

    eval_root = C.combine([block.outputs[0]])
    result = eval_root.eval({input_var1 : np.asarray([3], dtype=np.float32), input_var2 : np.asarray([-3], dtype=np.float32)})
    assert np.array_equal(result, [ 4.])
Exemplo n.º 11
0
    def gated_attention_gru_layer(self, context, query):
        q_processed = C.placeholder(shape=(2*self.hidden_dim,))
        c_processed = C.placeholder(shape=(2*self.hidden_dim,))

        #gate weight
        Wg = C.parameter(shape=(4*self.hidden_dim, 4*self.hidden_dim))
        att_gru = C.layers.GRU(2*self.hidden_dim)
        attention_model = C.layers.AttentionModel(self.hidden_dim, name='attention_model')
        
        @C.Function
        def out_func0(att_input, enc_input):
            enc_input2 = enc_input
            @C.Function
            def gru_with_attentioin(dh, x):
                c_att = attention_model(att_input, x)
                x = C.splice(x, c_att)
                x = C.element_times(x, C.sigmoid(C.times(x, Wg)))
                return att_gru(dh, x)
            att_context = Recurrence(gru_with_attentioin)(enc_input2)
            return att_context
        att_context = out_func0(q_processed, c_processed)
        return C.as_block(
            att_context,
            [(c_processed, context), (q_processed, query)],
            'gated_attention_gru_layer',
            'gated_attention_gru_layer')
Exemplo n.º 12
0
    def scale_dot_product_attention_block(self, contextQ, contextV, contextK,
                                          name):

        Q = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])
        V = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])
        K = C.placeholder(shape=(2 * self.hidden_dim, ),
                          dynamic_axes=[self.b_axis, self.q_axis])

        Ql = C.layers.Dense(100)(Q)
        Vl = C.layers.Dense(100)(V)
        Kl = C.layers.Dense(100)(K)

        kvw, kvw_mask = C.sequence.unpack(Kl, padding_value=0).outputs
        vvw, _ = C.sequence.unpack(Vl, padding_value=0).outputs
        KT = C.swapaxes(kvw)

        S = C.reshape(C.times(Ql, KT) / math.sqrt(100), -1)
        kvw_mask_expanded = C.sequence.broadcast_as(kvw_mask, Ql)
        S = C.softmax(
            C.element_select(kvw_mask_expanded, S, C.constant(-1e+30)))
        att = C.times(S, vvw)

        return C.as_block(att, [(Q, contextQ), (V, contextV),
                                (K, contextK)], 'sdp_attention_block' + name,
                          'sdp_attention_block' + name)
Exemplo n.º 13
0
        def _func(x):
            input_ph = C.placeholder()

            ph = C.placeholder()
            onehot_value = C.one_hot(ph,262)
            x1 = C.times(onehot_value, self.char_embed) # [#,*][50,16]
            # x2 = self.convs[0](x1) # [#,*][32,50,1]
            convs_res = []
            for i in range(self.filter_num):
                conv_res = self.convs[i](x1)
                convs_res.append(C.reshape(C.reduce_max(conv_res, axis=1),(-1,)))
            token_embed = C.splice(*convs_res) # [#,*][2048]
            
            tmp_res = token_embed
            for i in range(self.highway_num):
                tmp_res = self.highways[i](tmp_res)
            highway_out=tmp_res # [#,*][2048]
            proj_out = self.proj(highway_out) # [#,*][512]

            if not require_train:
                res = proj_out.clone(C.CloneMethod.freeze, {ph:input_ph})
            else:
                res = proj_out.clone(C.CloneMethod.clone, {ph:input_ph})
            return C.as_block(
                res,[(input_ph, x)], 'elmo_char_encoder', 'elmo_char_encoder'
            )
Exemplo n.º 14
0
 def matching_attention_layer(self, attention_context):
     att_context = C.placeholder(shape=(2*self.hidden_dim,))
     #matching layer
     matching_model = C.layers.AttentionModel(attention_dim=self.hidden_dim, name='attention_model')
     #gate weight
     Wg = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim))
     #gru
     att_gru = C.layers.GRU(self.hidden_dim)
     @C.Function
     def out_func1(att_input, enc_input):
         enc_input2 = enc_input
         @C.Function
         def bigru_with_match(dh, x):
             c_att = matching_model(att_input, dh)
             x = C.splice(x, c_att)
             x = C.element_times(x, C.sigmoid(C.times(x, Wg)))
             return att_gru(dh, x)
         return C.splice(C.layers.Recurrence(bigru_with_match)(enc_input2),
                     C.layers.Recurrence(bigru_with_match, go_backwards=True)(enc_input2),
                     name="bigru_with_match")
     match_context = out_func1(att_context, att_context)
     return C.as_block(
         match_context,
         [(att_context, attention_context)],
         'matching_attention_layer',
         'matching_attention_layer')
def test_block_with_unused_outputs():
    p1 = C.placeholder()
    p3 = C.placeholder()
    func1 = C.as_block(p1 + 1, [(p1, p3)], 'plus_func_1')
    p2 = C.placeholder()
    p4 = C.placeholder()
    func2 = C.as_block(p2 + 1, [(p2, p4)], 'plus_func_2')
    p5 = C.placeholder()
    func3 = C.as_block(C.combine([func2]), [(p4, p5)], 'empty_block')
    input_var1 = C.input_variable(shape=())
    input_var2 = C.input_variable(shape=())
    block = C.as_block(C.combine([func1, func3]), [(p3, input_var1), (p5, input_var2)], 'multi_output_block')

    eval_root = C.combine([block.outputs[0]])
    result = eval_root.eval({input_var1 : np.asarray([3], dtype=np.float32), input_var2 : np.asarray([-3], dtype=np.float32)})
    assert np.array_equal(result, [ 4.])
Exemplo n.º 16
0
    def output_layer(self, attention_context, modeling_context):
        att_context = C.placeholder(shape=(8 * self.hidden_dim, ))
        mod_context = C.placeholder(shape=(2 * self.hidden_dim, ))
        #output layer
        start_logits = C.layers.Dense(1, name='out_start')(C.dropout(
            C.splice(mod_context, att_context), self.dropout))
        if self.two_step:
            start_hardmax = seq_hardmax(start_logits)
            att_mod_ctx = C.sequence.last(
                C.sequence.gather(mod_context, start_hardmax))
        else:
            start_prob = C.softmax(start_logits)
            att_mod_ctx = C.sequence.reduce_sum(mod_context * start_prob)
        att_mod_ctx_expanded = C.sequence.broadcast_as(att_mod_ctx,
                                                       att_context)
        end_input = C.splice(att_context, mod_context, att_mod_ctx_expanded,
                             mod_context * att_mod_ctx_expanded)
        m2 = OptimizedRnnStack(self.hidden_dim,
                               bidirectional=True,
                               use_cudnn=self.use_cudnn,
                               name='output_rnn')(end_input)
        end_logits = C.layers.Dense(1, name='out_end')(C.dropout(
            C.splice(m2, att_context), self.dropout))

        return C.as_block(C.combine([start_logits, end_logits]),
                          [(att_context, attention_context),
                           (mod_context, modeling_context)], 'output_layer',
                          'output_layer')
Exemplo n.º 17
0
    def input_layer(self, cgw, cnw, qgw, qnw):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()

        input_glove_words = C.placeholder(shape=(self.wg_dim, ))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim, ))

        # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension
        # todo GlobalPooling/reduce_max should have a keepdims default to False
        embedded = self.word_glove()(input_glove_words, input_nonglove_words)
        highway = HighwayNetwork(dim=self.word_emb_dim,
                                 highway_layers=self.highway_layers)(embedded)
        highway_drop = C.layers.Dropout(self.dropout)(highway)
        processed = OptimizedRnnStack(self.hidden_dim,
                                      bidirectional=True,
                                      use_cudnn=self.use_cudnn,
                                      name='input_rnn')(highway_drop)

        q_processed = processed.clone(C.CloneMethod.share, {
            input_glove_words: qgw_ph,
            input_nonglove_words: qnw_ph
        })
        c_processed = processed.clone(C.CloneMethod.share, {
            input_glove_words: cgw_ph,
            input_nonglove_words: cnw_ph
        })

        return C.as_block(C.combine([c_processed, q_processed]),
                          [(cgw_ph, cgw), (cnw_ph, cnw), (qgw_ph, qgw),
                           (qnw_ph, qnw)], 'input_layer', 'input_layer')
def BinaryConvolution(operand,
                      filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,
                      bias=True,
                      init_bias=0,
                      op_name='BinaryConvolution', name=''):
    """ arguments:
            operand: tensor to convolve
            filter_shape: tuple indicating filter size
            num_filters: number of filters to use 
            channels: number of incoming channels
            init: type of initialization to use for weights
    """
    kernel_shape = (num_filters, channels) + filter_shape
    W = C.parameter(shape=kernel_shape, init=init, name="filter")

    binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand")
    binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides])
    r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve')

    bias_shape = (num_filters, 1, 1)
    b = C.parameter(shape=bias_shape, init=init_bias, name="bias")
    r = r + b

    # apply learnable param relu
    P = C.parameter(shape=r.shape, init=init, name="prelu")
    r = C.param_relu(P, r)
    return r
Exemplo n.º 19
0
def BinaryConvolution(operand,
                      filter_shape,
                      num_filters=1,
                      channels = 1,
                      init=C.glorot_uniform(),
                      pad=False,
                      strides=1,
                      bias=True,
                      init_bias=0,
                      op_name='BinaryConvolution', name=''):
    """ arguments:
            operand: tensor to convolve
            filter_shape: tuple indicating filter size
            num_filters: number of filters to use 
            channels: number of incoming channels
            init: type of initialization to use for weights
    """
    kernel_shape = (num_filters, channels) + filter_shape
    W = C.parameter(shape=kernel_shape, init=init, name="filter")

    binary_convolve_operand_p = C.placeholder(operand.shape, operand.dynamic_axes, name="operand")
    binary_convolve = C.convolution(CustomMultibit(W, 1), CustomMultibit(binary_convolve_operand_p, 1), auto_padding=[False, pad, pad], strides=[strides])
    r = C.as_block(binary_convolve, [(binary_convolve_operand_p, operand)], 'binary_convolve')

    bias_shape = (num_filters, 1, 1)
    b = C.parameter(shape=bias_shape, init=init_bias, name="bias")
    r = r + b

    # apply learnable param relu
    P = C.parameter(shape=r.shape, init=init, name="prelu")
    r = C.param_relu(P, r)
    return r
Exemplo n.º 20
0
 def modeling_layer(self, attention_context):
     att_context = C.placeholder(shape=(8 * self.hidden_dim, ))
     self._indrnn_builder._input_size = 8 * self.hidden_dim
     ind1 = [self._indrnn_builder.build(), self._indrnn_builder.build()]
     self._indrnn_builder._input_size = 2 * self.hidden_dim
     indrnns = [self._indrnn_builder.build() for _ in range(10)]
     indrnns = ind1 + indrnns
     #modeling layer 6 resnet layers
     model = C.layers.For(
         range(3),
         lambda i: C.layers.Sequential([
             #C.layers.ResNetBlock(
             C.layers.Sequential([
                 C.layers.LayerNormalization()
                 if self.use_layerbn else C.layers.identity,
                 C.layers.Dropout(self.dropout),
                 (C.layers.Recurrence(indrnns[4 * i]),
                  C.layers.Recurrence(indrnns[4 * i + 1], go_backwards=True)
                  ), C.splice,
                 C.layers.LayerNormalization()
                 if self.use_layerbn else C.layers.identity,
                 C.layers.Dropout(self.dropout),
                 (C.layers.Recurrence(indrnns[4 * i + 2]),
                  C.layers.Recurrence(indrnns[4 * i + 3], go_backwards=True)
                  ), C.splice
             ])
             #)
         ]))
     mod_context = model(att_context)
     return C.as_block(mod_context, [(att_context, attention_context)],
                       'modeling_layer', 'modeling_layer')
Exemplo n.º 21
0
    def input_layer(self, embed, cgw,cnw,cc,qgw,qnw,qc):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph  = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph  = C.placeholder()
  
        
        input_chars = C.placeholder(shape=(1,self.word_size,self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim,))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim,))

        # we need to reshape because GlobalMaxPooling/reduce_max is retaining a trailing singleton dimension
        # todo GlobalPooling/reduce_max should have a keepdims default to False embedded = C.splice(
        embedded = C.splice(
            C.reshape(self.charcnn(input_chars), self.convs),
            embed(input_glove_words, input_nonglove_words), name='splice_embed')
        highway = HighwayNetwork(dim=2*self.hidden_dim, highway_layers=self.highway_layers)(embedded)
        highway_drop = C.layers.Dropout(self.dropout)(highway)
        processed = OptimizedRnnStack(self.hidden_dim, bidirectional=True, use_cudnn=self.use_cudnn, name='input_rnn')(highway_drop)
        
        qce = C.one_hot(qc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
#        ace = C.one_hot(ac_ph, num_classes=self.c_dim, sparse_output=self.use_sparse)
                
        q_processed = processed.clone(C.CloneMethod.share, {input_chars:qce, input_glove_words:qgw_ph, input_nonglove_words:qnw_ph})
        c_processed = processed.clone(C.CloneMethod.share, {input_chars:cce, input_glove_words:cgw_ph, input_nonglove_words:cnw_ph})

        return C.as_block(
            C.combine([c_processed, q_processed]),
            [(cgw_ph, cgw),(cnw_ph, cnw),(cc_ph, cc),(qgw_ph, qgw),(qnw_ph, qnw),(qc_ph, qc)],
            'input_layer',
            'input_layer')
Exemplo n.º 22
0
def Block(f, op_name, name='', members={}, make_block=False): 
    if make_block: 
        inner_args = f.arguments
        args_map = [(arg, Placeholder(name=arg.name)) for arg in inner_args]
        f = as_block(f, args_map, op_name, name)
    for key in members:
        f.__dict__[key] = members[key]
    return f
Exemplo n.º 23
0
 def basic_network(cls, dims: int, op_name: str = '', instance_name: str = ''):
     ph = C.placeholder(dims, name='net_input')
     net = C.layers.Sequential([
             C.layers.Dense(8, C.tanh),
             C.layers.Dense(8, C.tanh),
             C.layers.Dense(dims, name='net_output'),
         ])(ph)
     return C.as_block(net, [(ph,ph)], op_name, instance_name)
Exemplo n.º 24
0
def positional_encoding(token_dims: int, discount_factor: float = 0.99):
    X = C.placeholder(token_dims, name='positional_encoding')
    encoder = C.layers.Recurrence(C.element_times,
                                  initial_state=1,
                                  return_full_state=True)(C.ones_like(X) *
                                                          discount_factor)
    return C.stop_gradient(
        C.as_block(encoder, [(X, X)], 'positional_encoding',
                   'positional_encoding_'))
Exemplo n.º 25
0
 def func(x_var):
     x = C.placeholder()
     transform_gate = C.sigmoid(C.times(x, WT, name=name + '_T') + bT)
     update = C.relu(C.times(x, WU, name=name + '_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),  # trans(x)*u(x)+(1-f(x))*x
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock' + name)
Exemplo n.º 26
0
 def _func(x):
     ph = C.placeholder()
     first_out = encoder(ph)
     second_out, third_out = bilm(first_out).outputs # [#,*][1024]
     dup_first_out = C.splice(first_out, first_out) #[#,*][1024]
     s = C.softmax(scales)
     out = gamma*(s[0]*dup_first_out+s[1]*second_out+s[2]*third_out)
     return C.as_block(
         out, [(ph, x)],'Elmo', 'Elmo'
     )
Exemplo n.º 27
0
 def reasoning_layer(self, inputs):
     input_ph = C.placeholder()
     rnn = create_birnn(GRU(self.hidden_dim), GRU(self.hidden_dim),
                        'reasoning_gru')
     block = Sequential(
         [LayerNormalization(name='layerbn'),
          Dropout(self.dropout), rnn])
     res = block(input_ph)
     return C.as_block(res, [(input_ph, inputs)], 'reasoning layer',
                       'reasoning layer')
Exemplo n.º 28
0
    def input_layer(self, cgw, cnw, cc, qgw, qnw, qc):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph = C.placeholder()
        input_chars = C.placeholder(shape=(1, self.word_size, self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim, ))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim, ))

        qce = C.one_hot(qc_ph,
                        num_classes=self.c_dim,
                        sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph,
                        num_classes=self.c_dim,
                        sparse_output=self.use_sparse)
        word_embed = self.word_glove()(input_glove_words, input_nonglove_words)
        char_embed = self.char_glove()(input_chars)
        embeded = C.splice(word_embed,
                           C.reshape(self.charcnn(char_embed), self.convs),
                           name='splice_embeded')

        self._indrnn_builder._input_size = self.word_emb_dim + self.convs
        ind1 = [self._indrnn_builder.build(), self._indrnn_builder.build()]
        self._indrnn_builder._input_size = 2 * self.hidden_dim
        indrnns = [self._indrnn_builder.build() for _ in range(4)]
        indrnns = ind1 + indrnns

        process = C.layers.For(
            range(3), lambda i: C.layers.Sequential([
                C.layers.Dropout(self.dropout),
                (C.layers.Recurrence(indrnns[2 * i]),
                 C.layers.Recurrence(indrnns[2 * i + 1], go_backwards=True)), C
                .splice
            ]))
        processed = process(embeded)

        q_processed = processed.clone(
            C.CloneMethod.share, {
                input_chars: qce,
                input_glove_words: qgw_ph,
                input_nonglove_words: qnw_ph
            })
        c_processed = processed.clone(
            C.CloneMethod.share, {
                input_chars: cce,
                input_glove_words: cgw_ph,
                input_nonglove_words: cnw_ph
            })

        return C.as_block(C.combine([c_processed, q_processed]),
                          [(cgw_ph, cgw), (cnw_ph, cnw), (cc_ph, cc),
                           (qgw_ph, qgw), (qnw_ph, qnw),
                           (qc_ph, qc)], 'input_layer', 'input_layer')
Exemplo n.º 29
0
 def weighted_sum(self, inputs):
     input_ph = C.placeholder()
     weight = Sequential([
         BatchNormalization(),
         Dropout(self.dropout),
         Dense(self.hidden_dim, activation=C.tanh),
         Dense(1, bias=False), C.sequence.softmax
     ])(input_ph)  # [#,c][1]
     res = C.sequence.reduce_sum(weight * input_ph)
     return C.as_block(C.combine(res, weight), [(input_ph, inputs)],
                       'weighted sum', 'weighted sum')
Exemplo n.º 30
0
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
        block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments, new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs]
                root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
    for function_to_convert in functions_to_convert:
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:            
            root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs])
            else:
                root_func = converted

    return root_func
Exemplo n.º 31
0
    def attention_layer(self, context, query, layer):

        q_processed = C.placeholder(shape=(2*self.hidden_dim,))
        p_processed = C.placeholder(shape=(2*self.hidden_dim,))

        qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs

        wq = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
        wp = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
        wg = C.parameter(shape=(8*self.hidden_dim, 8*self.hidden_dim), init=C.glorot_uniform())
        v = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform())

        # seq[tensor[2d]] p_len x 2d
        wpt = C.reshape(C.times(p_processed, wp), (-1, 2*self.hidden_dim))

        # q_len x 2d
        wqt = C.reshape(C.times(qvw, wq), (-1, 2*self.hidden_dim))
        
        # seq[tensor[q_len]]
        S = C.reshape(C.times(C.tanh(C.sequence.broadcast_as(wqt, p_processed) + wpt), v), (-1))

        qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, p_processed)

        # seq[tensor[q_len]]
        S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30))
        
        # seq[tensor[q_len]]
        A = C.softmax(S, axis=0)

        # seq[tensor[2d]]
        swap_qvw = C.swapaxes(qvw)
        cq = C.reshape(C.reduce_sum(A * C.sequence.broadcast_as(swap_qvw, A), axis=1), (-1))

        # seq[tensor[4d]]
        uc_concat = C.splice(p_processed, cq, p_processed * cq, cq * cq)
        
        # seq[tensor[4d]]
        gt = C.tanh(C.times(uc_concat, wg))
        
        # seq[tensor[4d]]
        uc_concat_star = gt * uc_concat
 
        # seq[tensor[4d]]
        vp = C.layers.Sequential([
            C.layers.Dropout(self.dropout),
            OptimizedRnnStack(self.hidden_dim, bidirectional=True, 
                use_cudnn=self.use_cudnn, name=layer+'_attention_rnn')])(uc_concat_star)
        
        return C.as_block(
            vp,
            [(p_processed, context), (q_processed, query)],
            'attention_layer',
            'attention_layer')
Exemplo n.º 32
0
 def wrap_in_block(fun_args, name):
     block_args = [placeholder(name=arg.name) for arg in fun_args
                   ]  # placeholders inside the BlockFunction
     combined_block_args = combine(
         block_args)  # the content of the BlockFunction
     arg_map = list(
         zip(block_args,
             fun_args))  # after wrapping, the block_args map to args
     combined_args = as_block(composite=combined_block_args,
                              block_arguments_map=arg_map,
                              block_op_name=name)
     return combined_args
Exemplo n.º 33
0
    def convolution(operand):
        
        bcv_operand_p = C.placeholder(
            operand.shape, operand.dynamic_axes, name="operand")
        
        bcv = C.convolution(
                    CustomMultibit(W, 1), 
                    CustomMultibit(bcv_operand_p, 1), 
                    auto_padding=[False, pad, pad], 
                    strides=[strides])

        return  C.as_block(bcv, [(bcv_operand_p, operand)], name)
Exemplo n.º 34
0
    def convolution(operand):

        bcv_operand_p = C.placeholder(operand.shape,
                                      operand.dynamic_axes,
                                      name="operand")

        bcv = C.convolution(CustomMultibit(W, 1),
                            CustomMultibit(bcv_operand_p, 1),
                            auto_padding=[False, pad, pad],
                            strides=[strides])

        return C.as_block(bcv, [(bcv_operand_p, operand)], name)
Exemplo n.º 35
0
 def output_layer(self, query, match_context):
     q_processed = C.placeholder(shape=(2*self.hidden_dim,))
     mat_context = C.placeholder(shape=(2*self.hidden_dim,))
     
     #output layer
     r_q = question_pooling(q_processed, 2*self.hidden_dim) #shape n*(2*self.hidden_dim)
     p1_logits = attention_weight(mat_context, r_q, 2*self.hidden_dim)
     attention_pool = C.sequence.reduce_sum(p1_logits * mat_context)
     state = C.layers.GRU(2*self.hidden_dim)(attention_pool, r_q)
     p2_logits = attention_weight(mat_context, state, 2*self.hidden_dim)
     
     @C.Function
     def start_ave_point(p1_logits, p2_logits, point):
         @C.Function
         def start_ave(last, now):
             now = now + last - last
             new_start = now * C.sequence.gather(p2_logits, point)
             point = C.sequence.future_value(point)
             return new_start
         start_logits_ave = C.layers.Recurrence(start_ave)(p1_logits)
         return start_logits_ave
     point = C.sequence.is_first(p1_logits)
     point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus))])(point)
     point = C.greater(C.constant(16), point)
     start_logits_ave = start_ave_point(p1_logits, p2_logits, point)
     
     @C.Function
     def end_ave_point(p1_logits, p2_logits, point):
         @C.Function
         def end_ave(last, now):
             now = now + last - last
             new_end = now * C.sequence.gather(p2_logits, point)
             point = C.sequence.past_value(point)
             return new_end
         end_logits_ave = C.layers.Recurrence(end_ave, go_backwards=True)(p2_logits)
         return end_logits_ave
     point = C.sequence.is_last(p1_logits)
     point = C.layers.Sequential([For(range(2), lambda: C.layers.Recurrence(C.plus, go_backwards=True))])(point)
     point = C.greater(C.constant(16),point)
     end_logits_ave = end_ave_point(p1_logits, p2_logits, point)
     
     start_logits = seq_hardmax(start_logits_ave)
     end_logits = seq_hardmax(end_logits_ave)
     '''
     start_logits = seq_hardmax(p1_logits)
     end_logits = seq_hardmax(p2_logits)
     '''
     return C.as_block(
         C.combine([start_logits, end_logits]),
         [(q_processed, query), (mat_context, match_context)],
         'output_layer',
         'output_layer')
Exemplo n.º 36
0
 def func(x_var):
     x  = C.placeholder()
     WT = C.Parameter((dim,dim,), init=transform_weight_initializer, name=name+'_WT')
     bT = C.Parameter(dim,        init=transform_bias_initializer,   name=name+'_bT')
     WU = C.Parameter((dim,dim,), init=update_weight_initializer,    name=name+'_WU')
     bU = C.Parameter(dim,        init=update_bias_initializer,      name=name+'_bU')
     transform_gate = C.sigmoid(C.times(x, WT, name=name+'_T') + bT)
     update = C.relu(C.times(x, WU, name=name+'_U') + bU)
     return C.as_block(
         x + transform_gate * (update - x),
         [(x, x_var)],
         'HighwayBlock',
         'HighwayBlock'+name)
Exemplo n.º 37
0
    def input_layer(self, cgw, cc, qgw, qc, qnw, cnw):
        cgw_ph = C.placeholder()
        cnw_ph = C.placeholder()
        cc_ph = C.placeholder()
        qgw_ph = C.placeholder()
        qnw_ph = C.placeholder()
        qc_ph = C.placeholder()

        input_chars = C.placeholder(shape=(1, self.word_size, self.c_dim))
        input_glove_words = C.placeholder(shape=(self.wg_dim, ))
        input_nonglove_words = C.placeholder(shape=(self.wn_dim, ))

        embedded = C.splice(C.reshape(self.charcnn(input_chars), self.convs),
                            self.embed()(input_glove_words,
                                         input_nonglove_words),
                            name='splice_embed')

        highway = HighwayNetwork(dim=self.elmo_dim + self.hidden_dim +
                                 self.convs,
                                 highway_layers=self.highway_layers)(embedded)
        highway_drop = C.layers.Dropout(self.dropout)(highway)
        processed = OptimizedRnnStack(self.hidden_dim,
                                      num_layers=1,
                                      bidirectional=True,
                                      use_cudnn=self.use_cudnn,
                                      name='input_rnn')(highway_drop)

        qce = C.one_hot(qc_ph,
                        num_classes=self.c_dim,
                        sparse_output=self.use_sparse)
        cce = C.one_hot(cc_ph,
                        num_classes=self.c_dim,
                        sparse_output=self.use_sparse)

        q_processed = processed.clone(
            C.CloneMethod.share, {
                input_chars: qce,
                input_glove_words: qgw_ph,
                input_nonglove_words: qnw_ph
            })
        c_processed = processed.clone(
            C.CloneMethod.share, {
                input_chars: cce,
                input_glove_words: cgw_ph,
                input_nonglove_words: cnw_ph
            })

        return C.as_block(C.combine([c_processed, q_processed]),
                          [(cgw_ph, cgw), (cc_ph, cc), (qgw_ph, qgw),
                           (qc_ph, qc), (qnw_ph, qnw),
                           (cnw_ph, cnw)], 'input_layer', 'input_layer')
Exemplo n.º 38
0
def test_model_one_output_of_multi_output_function():
    input_dim = 2
    proj_dim = 11
    x = input_variable((input_dim,))

    x_placeholder = placeholder_variable()
    w = parameter((input_dim, proj_dim))
    b = parameter((proj_dim,))
    proj = times(x_placeholder, w)
    proj_plus_bias = proj + b
    combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op')

    labels = input_variable((proj_dim,))
    lr_schedule = learning_rate_schedule(0.003, UnitType.sample)
    ce = cross_entropy_with_softmax(combined_model.outputs[0], labels)
    pe = classification_error(combined_model.outputs[0], labels)
    trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
Exemplo n.º 39
0
 def wrap_in_block(fun_args, name):
     block_args = [placeholder_variable(name=arg.name) for arg in fun_args]  # placeholders inside the BlockFunction
     combined_block_args = combine(block_args)                               # the content of the BlockFunction
     arg_map = list(zip(block_args, fun_args))                               # after wrapping, the block_args map to args
     combined_args = as_block(composite=combined_block_args, block_arguments_map=arg_map, block_op_name=name)
     return combined_args
def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
               proposal_layer_param_string=None, conv_bias_init=0.0):
    '''
    Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
        Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
        "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"

    Outputs object detection proposals by applying estimated bounding-box
    transformations to a set of regular boxes (called "anchors").

    Args:
        conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
        scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
        im_info:         A CNTK variable or constant containing
                         (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
                         e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
        add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
        proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.

    Returns:
        rpn_rois - the proposed ROIs
        rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
    '''

    # RPN network
    # init = 'normal', initValueScale = 0.01, initBias = 0.1
    num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
    rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
                                init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
    rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
    rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
                                init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 4(coords) * 9(anchors)

    # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
    num_predictions = int(rpn_cls_score.shape[0] / 2)
    rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
    p_rpn_cls_score_rshp = cntk.placeholder()
    rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
    rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
    rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")

    # proposal layer
    rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
    rpn_rois = alias(rpn_rois_raw, name='rpn_rois')

    rpn_losses = None
    if(add_loss_functions):
        # RPN targets
        # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
        atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
        rpn_labels = atl.outputs[0]
        rpn_bbox_targets = atl.outputs[1]
        rpn_bbox_inside_weights = atl.outputs[2]

        # classification loss
        p_rpn_labels = cntk.placeholder()
        p_rpn_cls_score_rshp = cntk.placeholder()

        keeps = cntk.greater_equal(p_rpn_labels, 0.0)
        fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
        bg_labels = minus(1, fg_labels, name="bg_targets")
        rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
        rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
        rpn_loss_cls = element_times(rpn_ce, keeps)

        # The terms that are accounted for in the cls loss are those that have a label >= 0
        cls_num_terms = reduce_sum(keeps)
        cls_normalization_factor = 1.0 / cls_num_terms
        normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor

        reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
                                         [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
                                         'CE_with_ignore', 'norm_rpn_cls_loss')

        # regression loss
        p_rpn_bbox_pred = cntk.placeholder()
        p_rpn_bbox_targets = cntk.placeholder()
        p_rpn_bbox_inside_weights = cntk.placeholder()
        rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
        # The bbox loss is normalized by the rpn batch size
        bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
        normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor

        reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
                                          [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
                                           (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
                                          'SmoothL1Loss', 'norm_rpn_bbox_loss')

        rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")

    return rpn_rois, rpn_losses
Exemplo n.º 41
0
def convert(root_func, filter, converter):
    '''
    Clones the graph underlying root_func and in the clone substitutes
    all Functions obtained by applying 'filter', with a new Function obtained by calling the specified 'converter'

    Args:
        root_func: a root function of a graph to be cloned and converted
        filter: a lambda for filtering out the Functions to be converted
        converter: a lambda for obtaining the substitute for each of the Functions to be converted
    Returns:
        Cloned and converted Function (graph)
    '''
    # recursively convert for blocks in root_func
    blocks = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
    for i in range(len(blocks)):
        # search for blocks again in case block input/output has been modified
        blocks1 = C.logging.graph.depth_first_search(root_func, lambda x : type(x) == C.Function and x.root_function.is_block, depth = 0)
        block = blocks1[i] # assuming depth_first_search order to be stable, so use the old index on new search results
        block_root = C.as_composite(block.block_root)
        new_block_root = convert(block_root, filter, converter)
        if new_block_root != block_root:
            block_arguments_mapping = dict(block.block_arguments_mapping)
            new_block_arguments_mapping = []
            for arg, new_arg in zip(block_root.arguments, new_block_root.arguments):
                new_block_arguments_mapping += [(new_arg, block_arguments_mapping[arg])]
            new_block = C.as_block(new_block_root, new_block_arguments_mapping, block.op_name, block.name)
            if all([x not in root_func.outputs for x in block.outputs]) or all([x in block.outputs for x in root_func.outputs]):
                root_func = root_func.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
            else:
                new_outputs = [new_block.outputs[block.outputs.index(x)] if x in block.outputs else None for x in root_func.outputs]
                root_func_nonreplaced = C.combine([x for x in root_func.outputs if x not in block.outputs])
                root_func_nonreplaced_clone = root_func_nonreplaced.clone(C.CloneMethod.share, dict(zip(block.outputs, new_block.outputs)))
                idx = 0
                for nonreplaced_output in root_func_nonreplaced_clone.outputs:
                    while new_outputs[idx]:
                        idx += 1
                    new_outputs[idx] = nonreplaced_output
                root_func = C.combine(new_outputs)

    # replace all Function instances under root_func that pass the specified 'filter'
    functions_to_convert = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
    for i in range(len(functions_to_convert)):
        # The graph could be modified already by this function, so we need to rescan to the new set.
        functions_to_convert1 = C.logging.graph.depth_first_search(root_func, filter, depth = 0)
        # We are using a filter passed in by the caller. So once a function is converted, we may not
        # get the same number of functions again, so we need to use correct index depending on the new size.
        index = 0
        if len(functions_to_convert) > len(functions_to_convert1):
            assert(len(functions_to_convert) - len(functions_to_convert1) == i) # Only one conversion at a time.
            # index = 0 will work for this case, we are picking the first function from the new list.
        elif len(functions_to_convert) == len(functions_to_convert1):
            index = i # here we pick the current index of the for loop.
        else:
            raise RuntimeError("The conversion adds another possible conversion(s). Stopping infinite conversions.")

        function_to_convert = functions_to_convert1[index]
        converted = converter(function_to_convert)

        if not function_to_convert.output in root_func.outputs:            
            root_func = root_func.clone(C.CloneMethod.share, {function_to_convert.output : converted.output})
        else:
            # if cudnn_rnn output is the root_func output, just use converted as root_func and no clone needed
            if len(root_func.outputs) > 1:
                root_func = C.combine([converted if x == function_to_convert.output else x for x in root_func.outputs])
            else:
                root_func = converted

    return root_func