Exemplo n.º 1
0
def spatial_attention(cost_volume):
    feature = 4 * 9
    k = 9
    label = 9
    dres0 = convbn_3d(cost_volume, feature / 2, 3, 1)
    dres0 = Activation('relu')(dres0)
    dres0 = convbn_3d(dres0, 1, 3, 1)
    cost0 = Activation('relu')(dres0)

    cost0 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                  (0, 2, 3, 1)))(cost0)

    cost1 = convbn(cost0, label // 2, (1, k), 1, 1)
    cost1 = Activation('relu')(cost1)
    cost1 = convbn(cost1, 1, (k, 1), 1, 1)
    cost1 = Activation('relu')(cost1)

    cost2 = convbn(cost0, label // 2, (k, 1), 1, 1)
    cost2 = Activation('relu')(cost2)
    cost2 = convbn(cost2, 1, (1, k), 1, 1)
    cost2 = Activation('relu')(cost2)

    cost = add([cost1, cost2])
    cost = Activation('sigmoid')(cost)

    cost = Lambda(lambda y: K.repeat_elements(K.expand_dims(y, 1), 9, 1))(cost)
    cost = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost)
    return multiply([cost, cost_volume])
Exemplo n.º 2
0
def define_AttMLFNet(sz_input, sz_input2, view_n, learning_rate):
    """ 4 branches inputs"""
    input_list = []
    for i in range(len(view_n) * 4):
        input_list.append(Input(shape=(sz_input, sz_input2, 1)))
    """ 4 branches features"""
    feature_extraction_layer = feature_extraction(sz_input, sz_input2)
    feature_list = []
    for i in range(len(view_n) * 4):
        feature_list.append(feature_extraction_layer(input_list[i]))
    feature_v_list = []
    feature_h_list = []
    feature_45_list = []
    feature_135_list = []
    for i in range(9):
        feature_h_list.append(feature_list[i])
    for i in range(9, 18):
        feature_v_list.append(feature_list[i])
    for i in range(18, 27):
        feature_45_list.append(feature_list[i])
    for i in range(27, len(feature_list)):
        feature_135_list.append(feature_list[i])
    """ cost volume """
    cv_h = Lambda(_get_h_CostVolume_)(feature_h_list)
    cv_v = Lambda(_get_v_CostVolume_)(feature_v_list)
    cv_45 = Lambda(_get_45_CostVolume_)(feature_45_list)
    cv_135 = Lambda(_get_135_CostVolume_)(feature_135_list)
    """ intra branch """
    cv_h_3d, cv_h_ca = to_3d_h(cv_h)
    cv_v_3d, cv_v_ca = to_3d_v(cv_v)
    cv_45_3d, cv_45_ca = to_3d_45(cv_45)
    cv_135_3d, cv_135_ca = to_3d_135(cv_135)
    """ inter branch """
    cv, attention_4 = branch_attention(
        multiply([cv_h_3d, cv_v_3d, cv_45_3d, cv_135_3d]), cv_h_ca, cv_v_ca,
        cv_45_ca, cv_135_ca)
    """ cost volume regression """
    cost = basic(cv)

    cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                 (0, 2, 3, 1)))(cost)
    pred = Activation('softmax')(cost)
    pred = Lambda(disparityregression)(pred)

    model = Model(inputs=input_list, outputs=[pred])

    model.summary()

    opt = Adam(lr=learning_rate)

    model.compile(optimizer=opt, loss='mae')

    return model
Exemplo n.º 3
0
def to_3d_135(cost_volume_135):
    feature = 4 * 9
    channel_135 = GlobalAveragePooling3D(
        data_format='channels_last')(cost_volume_135)
    channel_135 = Lambda(lambda y: K.expand_dims(
        K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135)
    channel_135 = Conv3D(feature / 2,
                         1,
                         1,
                         'same',
                         data_format='channels_last')(channel_135)
    channel_135 = Activation('relu')(channel_135)
    channel_135 = Conv3D(3, 1, 1, 'same',
                         data_format='channels_last')(channel_135)
    channel_135 = Activation('sigmoid')(channel_135)
    channel_135 = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1],
        y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3],
        y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3]
    ],
                                                 axis=-1))(channel_135)
    channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))(
        channel_135)
    channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135)
    cv_135_tmp = multiply([channel_135, cost_volume_135])
    cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same',
                        data_format='channels_last')(cv_135_tmp)
    cv_135_tmp = Activation('relu')(cv_135_tmp)
    cv_135_tmp = Conv3D(3, 1, 1, 'same',
                        data_format='channels_last')(cv_135_tmp)
    cv_135_tmp = Activation('sigmoid')(cv_135_tmp)
    attention_135 = Lambda(lambda y: K.concatenate([
        y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1],
        y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3],
        y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3]
    ],
                                                   axis=-1))(cv_135_tmp)
    attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(
        attention_135)
    cv_135_multi = multiply([attention_135, cost_volume_135])
    dres3 = convbn_3d(cv_135_multi, feature, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1)
    dres3 = Activation('relu')(dres3)
    dres3 = convbn_3d(dres3, 1, 3, 1)
    cost3 = Activation('relu')(dres3)
    cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                  (0, 2, 3, 1)))(cost3)
    return cost3, cv_135_multi
Exemplo n.º 4
0
def define_LFattNet(sz_input, sz_input2, view_n, learning_rate):
    """ 81 inputs"""
    input_list = []
    for i in range(len(view_n) * len(view_n)):
        print('input ' + str(i))
        input_list.append(Input(shape=(sz_input, sz_input2, 1)))
    """ 81 features"""
    feature_extraction_layer = feature_extraction(sz_input, sz_input2)

    feature_list = []
    for i in range(len(view_n) * len(view_n)):
        print('feature ' + str(i))
        feature_list.append(feature_extraction_layer(input_list[i]))
    """ cost volume """
    cv = Lambda(_getCostVolume_)(feature_list)
    """ channel attention """
    cv, attention = channel_attention(cv)
    """ cost volume regression """
    cost = basic(cv)
    cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1),
                                                 (0, 2, 3, 1)))(cost)
    pred = Activation('softmax')(cost)

    pred = Lambda(disparityregression)(pred)

    # when training use below
    # model = Model(inputs=input_list, outputs=[pred])

    # when evaluation use below
    model = Model(inputs=input_list, outputs=[pred, attention])

    model.summary()

    opt = Adam(lr=learning_rate)

    model.compile(optimizer=opt, loss='mae')

    return model
Exemplo n.º 5
0
    def _attention_model(self, a, h_prev, Ex_t):
        with tf.variable_scope(self.my_scope) as var_scope:
            with tf.name_scope(var_scope.original_name_scope):
                with tf.variable_scope('AttentionModel'):
                    CONF = self.C
                    B = self.ActualBatchSize
                    L = CONF.L
                    D = CONF.D
                    h = h_prev
                    n = self.output_size
                    m = CONF.m

                    self.assertOutputShape(h_prev)
                    assert K.int_shape(a) == (B, L, D)
                    assert K.int_shape(h_prev) == (B, n)
                    assert K.int_shape(Ex_t) == (B, m)

                    if (CONF.att_model == 'MLP_shared') or (CONF.att_model
                                                            == '1x1_conv'):
                        """
                        Here we'll effectively create L MLP stacks all sharing the same weights. Each
                        stack receives a concatenated vector of a(l) and h as input.
                        """
                        # h.shape = (B,n). Expand it to (B,1,n) and then broadcast to (B,L,n) in order
                        # to concatenate with feature vectors of 'a' whose shape=(B,L,D)
                        h = tf.identity(K.tile(K.expand_dims(h, axis=1),
                                               (1, L, 1)),
                                        name='h_t-1')
                        a = tf.identity(a, name='a')
                        if CONF.feed_clock_to_att:
                            assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM'
                            # Ex_t.shape = (B,m). Expand it to (B,1,m) and then broadcast to (B,L,m) in order
                            # to concatenate with feature vectors of 'a' whose shape=(B,L,D)
                            x = tf.identity(K.tile(K.expand_dims(Ex_t, axis=1),
                                                   (1, L, 1)),
                                            name='Ex_t')
                            # Concatenate a, h nd x. Final shape = (B, L, D+n+m)
                            att_inp = tf.concat([a, h, x], -1,
                                                name='ai_h_x')  # (B, L, D+n+m)
                            assert K.int_shape(att_inp) == (B, L, D + n + m)
                        else:
                            # Concatenate a and h. Final shape = (B, L, D+n)
                            att_inp = tf.concat([a, h], -1,
                                                name='ai_h')  # (B, L, D+n)
                            assert K.int_shape(att_inp) == (B, L, D + n)

                        if CONF.att_model == 'MLP_shared':
                            ## For #layers > 1 this implementation will endup being different than the paper's implementation because they only
                            ## Below is how it is implemented in the code released by the authors of the paper
                            ##     for i in range(1, CONF.att_a_layers+1):
                            ##         if not last_layer:
                            ##              a = Dense(CONF['att_a_%d_n'%(i,)], activation=tanh)(a)
                            ##         else: # last-layer
                            ##              a = AffineTransform(CONF['att_a_%d_n'%(i,)])(a)
                            ##     h = AffineTransform(CONF['att_h_%d_n'%(i,)])(h)
                            ##     ah = a + K.expand_dims(h, axis=1)
                            ##     ah = tanh(ah)
                            ##     alpha = Dense(softmax_layer_params, activation=softmax)(ah)

                            alpha_1_ = tfc.MLPStack(CONF.att_layers)(
                                att_inp)  # (B, L, 1)
                            assert K.int_shape(alpha_1_) == (B, L, 1
                                                             )  # (B, L, 1)
                            alpha_ = K.squeeze(alpha_1_,
                                               axis=2)  # output shape = (B, L)
                            assert K.int_shape(alpha_) == (B, L)

                        elif CONF.att_model == '1x1_conv':
                            """
                            NOTE: The above model ('MLP_shared') tantamounts to a 
                            1x1 convolution on the Lx1 shaped (L=H.W) convnet features with num_channels=D i.e. an input shape of (H,W,C) or (1,L,D).
                            Using 'dimctx' kernels of size (1,1) and stride=1 resulting in an output shape of (1,L,dimctx) [or (B, L, 1, dimctx) with the batch dimension included].
                            This option provides such a convnet layer implementation (which turns out not to be faster than MLP_shared).
                            """
                            att_inp = tf.expand_dims(att_inp, axis=1)
                            alpha_1_ = tfc.ConvStack(
                                CONF.att_layers,
                                (B, 1, L, D + self.output_size))(att_inp)
                            assert K.int_shape(alpha_1_) == (B, 1, L, 1)
                            alpha_ = tf.squeeze(alpha_1_, axis=[1,
                                                                3])  # (B, L)
                            assert K.int_shape(alpha_) == (B, L)

                    elif CONF.att_model == 'MLP_full':  # MLP: weights not shared across L
                        ## concatenate a and h_prev and pass them through a MLP. This is different than the theano
                        ## implementation of the paper because we flatten a from (B,L,D) to (B,L*D). Hence each element
                        ## of the L*D vector receives its own weight because the effective weight matrix here would be
                        ## shape (L*D, num_dense_units) as compared to (D, num_dense_units) as in the shared_weights case

                        ## Concatenate a and h. Final shape will be (B, L*D+n)
                        with tf.variable_scope('a_h'):
                            a_ = K.batch_flatten(a)  # (B, L*D)
                            a_.set_shape(
                                (B, L * D))  # Flatten loses shape info
                            if CONF.build_scanning_RNN and CONF.feed_clock_to_att:
                                assert CONF.build_scanning_RNN, 'Attention model can take Ex_t only in a scanning-LSTM'
                                att_inp = tf.concat(
                                    [a_, h, Ex_t], -1,
                                    name="a_h_x")  # (B, L*D + n + m)
                                assert K.int_shape(att_inp) == (
                                    B, L * D + self.output_size +
                                    m), 'shape %s != %s' % (
                                        K.int_shape(att_inp),
                                        (B, L * D + self.output_size + m))
                            else:
                                att_inp = tf.concat([a_, h], -1,
                                                    name="a_h")  # (B, L*D + n)
                                assert K.int_shape(att_inp) == (
                                    B, L * D +
                                    self.output_size), 'shape %s != %s' % (
                                        K.int_shape(att_inp),
                                        (B, L * D + self.output_size))
                        alpha_ = tfc.MLPStack(CONF.att_layers)(
                            att_inp)  # (B, L)
                        assert K.int_shape(alpha_) == (B, L)

                    else:
                        raise AttributeError(
                            'Invalid value of att_model param: %s' %
                            CONF.att_model)

                    ## Softmax
                    alpha = tf.identity(tf.nn.softmax(alpha_), name='alpha')
                    assert K.int_shape(alpha) == (B, L)

                    ## Attention Modulator: Beta
                    if CONF.build_att_modulator:
                        beta = tfc.MLPStack(CONF.att_modulator,
                                            self.batch_output_shape)(h_prev)
                        beta = tf.identity(beta, name='beta')
                    else:
                        beta = tf.constant(1., shape=(B, 1), dtype=CONF.dtype)
                    assert K.int_shape(beta) == (B, 1)

                    return alpha, beta