def spatial_attention(cost_volume): feature = 4 * 9 k = 9 label = 9 dres0 = convbn_3d(cost_volume, feature / 2, 3, 1) dres0 = Activation('relu')(dres0) dres0 = convbn_3d(dres0, 1, 3, 1) cost0 = Activation('relu')(dres0) cost0 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost0) cost1 = convbn(cost0, label // 2, (1, k), 1, 1) cost1 = Activation('relu')(cost1) cost1 = convbn(cost1, 1, (k, 1), 1, 1) cost1 = Activation('relu')(cost1) cost2 = convbn(cost0, label // 2, (k, 1), 1, 1) cost2 = Activation('relu')(cost2) cost2 = convbn(cost2, 1, (1, k), 1, 1) cost2 = Activation('relu')(cost2) cost = add([cost1, cost2]) cost = Activation('sigmoid')(cost) cost = Lambda(lambda y: K.repeat_elements(K.expand_dims(y, 1), 9, 1))(cost) cost = Lambda(lambda y: K.repeat_elements(y, feature, 4))(cost) return multiply([cost, cost_volume])
def atari_qnet(input_shape, num_actions, net_name, net_size): net_name = net_name.lower() # input state state = Input(shape=input_shape) # convolutional layers conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu') conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu') conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu') # if recurrent net then change input shape if 'drqn' in net_name: # recurrent net (drqn) lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2]) perm_state = Lambda(lambda_perm_state)(state) dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state) # extract features with `TimeDistributed` wrapped convolutional layers dist_conv1 = TimeDistributed(conv1_32)(dist_state) dist_conv2 = TimeDistributed(conv2_64)(dist_conv1) dist_convf = TimeDistributed(conv3_64)(dist_conv2) feature = TimeDistributed(Flatten())(dist_convf) elif 'dqn' in net_name: # fully connected net (dqn) # extract features with convolutional layers conv1 = conv1_32(state) conv2 = conv2_64(conv1) convf = conv3_64(conv2) feature = Flatten()(convf) # network type. Dense for dqn; LSTM or GRU for drqn if 'lstm' in net_name: net_type = LSTM elif 'gru' in net_name: net_type = GRU else: net_type = Dense # dueling or regular dqn/drqn if 'dueling' in net_name: value1 = net_type(net_size, activation='relu')(feature) adv1 = net_type(net_size, activation='relu')(feature) value2 = Dense(1)(value1) adv2 = Dense(num_actions)(adv1) mean_adv2 = Lambda(lambda x: K.mean(x, axis=1))(adv2) ones = K.ones([1, num_actions]) lambda_exp = lambda x: K.dot(K.expand_dims(x, axis=1), -ones) exp_mean_adv2 = Lambda(lambda_exp)(mean_adv2) sum_adv = add([exp_mean_adv2, adv2]) exp_value2 = Lambda(lambda x: K.dot(x, ones))(value2) q_value = add([exp_value2, sum_adv]) else: hid = net_type(net_size, activation='relu')(feature) q_value = Dense(num_actions)(hid) # build model return Model(inputs=state, outputs=q_value)
def define_AttMLFNet(sz_input, sz_input2, view_n, learning_rate): """ 4 branches inputs""" input_list = [] for i in range(len(view_n) * 4): input_list.append(Input(shape=(sz_input, sz_input2, 1))) """ 4 branches features""" feature_extraction_layer = feature_extraction(sz_input, sz_input2) feature_list = [] for i in range(len(view_n) * 4): feature_list.append(feature_extraction_layer(input_list[i])) feature_v_list = [] feature_h_list = [] feature_45_list = [] feature_135_list = [] for i in range(9): feature_h_list.append(feature_list[i]) for i in range(9, 18): feature_v_list.append(feature_list[i]) for i in range(18, 27): feature_45_list.append(feature_list[i]) for i in range(27, len(feature_list)): feature_135_list.append(feature_list[i]) """ cost volume """ cv_h = Lambda(_get_h_CostVolume_)(feature_h_list) cv_v = Lambda(_get_v_CostVolume_)(feature_v_list) cv_45 = Lambda(_get_45_CostVolume_)(feature_45_list) cv_135 = Lambda(_get_135_CostVolume_)(feature_135_list) """ intra branch """ cv_h_3d, cv_h_ca = to_3d_h(cv_h) cv_v_3d, cv_v_ca = to_3d_v(cv_v) cv_45_3d, cv_45_ca = to_3d_45(cv_45) cv_135_3d, cv_135_ca = to_3d_135(cv_135) """ inter branch """ cv, attention_4 = branch_attention( multiply([cv_h_3d, cv_v_3d, cv_45_3d, cv_135_3d]), cv_h_ca, cv_v_ca, cv_45_ca, cv_135_ca) """ cost volume regression """ cost = basic(cv) cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost) pred = Activation('softmax')(cost) pred = Lambda(disparityregression)(pred) model = Model(inputs=input_list, outputs=[pred]) model.summary() opt = Adam(lr=learning_rate) model.compile(optimizer=opt, loss='mae') return model
def to_3d_135(cost_volume_135): feature = 4 * 9 channel_135 = GlobalAveragePooling3D( data_format='channels_last')(cost_volume_135) channel_135 = Lambda(lambda y: K.expand_dims( K.expand_dims(K.expand_dims(y, 1), 1), 1))(channel_135) channel_135 = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('relu')(channel_135) channel_135 = Conv3D(3, 1, 1, 'same', data_format='channels_last')(channel_135) channel_135 = Activation('sigmoid')(channel_135) channel_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(channel_135) channel_135 = Lambda(lambda y: K.reshape(y, (K.shape(y)[0], 1, 1, 1, 9)))( channel_135) channel_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))(channel_135) cv_135_tmp = multiply([channel_135, cost_volume_135]) cv_135_tmp = Conv3D(feature / 2, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('relu')(cv_135_tmp) cv_135_tmp = Conv3D(3, 1, 1, 'same', data_format='channels_last')(cv_135_tmp) cv_135_tmp = Activation('sigmoid')(cv_135_tmp) attention_135 = Lambda(lambda y: K.concatenate([ y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 0:1], y[:, :, :, :, 1:2], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3], y[:, :, :, :, 2:3] ], axis=-1))(cv_135_tmp) attention_135 = Lambda(lambda y: K.repeat_elements(y, 4, -1))( attention_135) cv_135_multi = multiply([attention_135, cost_volume_135]) dres3 = convbn_3d(cv_135_multi, feature, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 2, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(cv_135_multi, feature / 4, 3, 1) dres3 = Activation('relu')(dres3) dres3 = convbn_3d(dres3, 1, 3, 1) cost3 = Activation('relu')(dres3) cost3 = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost3) return cost3, cv_135_multi
def atari_acnet(input_shape, num_actions, net_name, net_size): net_name = net_name.lower() # input state state = Input(shape=input_shape) # convolutional layers conv1_32 = Conv2D(32, (8, 8), strides=(4, 4), activation='relu') conv2_64 = Conv2D(64, (4, 4), strides=(2, 2), activation='relu') conv3_64 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu') # if recurrent net then change input shape if 'lstm' in net_name or 'gru' in net_name: # recurrent net lambda_perm_state = lambda x: K.permute_dimensions(x, [0, 3, 1, 2]) perm_state = Lambda(lambda_perm_state)(state) dist_state = Lambda(lambda x: K.stack([x], axis=4))(perm_state) # extract features with `TimeDistributed` wrapped convolutional layers dist_conv1 = TimeDistributed(conv1_32)(dist_state) dist_conv2 = TimeDistributed(conv2_64)(dist_conv1) dist_convf = TimeDistributed(conv3_64)(dist_conv2) feature = TimeDistributed(Flatten())(dist_convf) # specify net type for the following layer if 'lstm' in net_name: net_type = LSTM elif 'gru' in net_name: net_type = GRU elif 'fully connected' in net_name: # fully connected net # extract features with convolutional layers conv1 = conv1_32(state) conv2 = conv2_64(conv1) convf = conv3_64(conv2) feature = Flatten()(convf) # specify net type for the following layer net_type = Dense # actor (policy) and critic (value) stream hid = net_type(net_size, activation='relu')(feature) logits = Dense(num_actions, kernel_initializer='zeros')(hid) value = Dense(1)(hid) # build model return Model(inputs=state, outputs=[value, logits])
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size #NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def define_LFattNet(sz_input, sz_input2, view_n, learning_rate): """ 81 inputs""" input_list = [] for i in range(len(view_n) * len(view_n)): print('input ' + str(i)) input_list.append(Input(shape=(sz_input, sz_input2, 1))) """ 81 features""" feature_extraction_layer = feature_extraction(sz_input, sz_input2) feature_list = [] for i in range(len(view_n) * len(view_n)): print('feature ' + str(i)) feature_list.append(feature_extraction_layer(input_list[i])) """ cost volume """ cv = Lambda(_getCostVolume_)(feature_list) """ channel attention """ cv, attention = channel_attention(cv) """ cost volume regression """ cost = basic(cv) cost = Lambda(lambda x: K.permute_dimensions(K.squeeze(x, -1), (0, 2, 3, 1)))(cost) pred = Activation('softmax')(cost) pred = Lambda(disparityregression)(pred) # when training use below # model = Model(inputs=input_list, outputs=[pred]) # when evaluation use below model = Model(inputs=input_list, outputs=[pred, attention]) model.summary() opt = Adam(lr=learning_rate) model.compile(optimizer=opt, loss='mae') return model
def gram_matrix(x): features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) gram = K.dot(features, K.transpose(features)) return gram
def gram_matrix(x): features = backend.batch_flatten(backend.permute_dimensions(x, (2, 0, 1))) gram = backend.dot(features, backend.transpose(features)) return gram