features_matrix = Input(feat[0].shape) adj_matrix = Input(adj[0].shape) X = Dense(32, activation = 'linear', use_bias = True)(features_matrix) _X = Lambda(lambda x: K.batch_dot(x[0], x[1]))([adj_matrix, X]) _X = Lambda(lambda x: K.relu(x))(_X) gate1 = Dense(32, activation='linear',use_bias = True)(features_matrix) gate2 = Dense(32, activation='linear',use_bias= True)(_X) gate3 = Add()([gate1,gate2]) coeff = Lambda(lambda x: K.sigmoid(x))(gate3) gated_X = Multiply()([coeff,_X]) gatedX = Multiply()([Lambda(lambda x: 1-x)(coeff), Dense(32,activation=None)(features_matrix)]) _X = Add()([gated_X,gatedX]) conv_output = Lambda(lambda x: K.relu(x))(_X) for i in range(num_layers-1): X = Dense(32, activation = 'linear', use_bias = True)(conv_output) _X = Lambda(lambda x: K.batch_dot(x[0], x[1]))([adj_matrix, X]) _X = Lambda(lambda x: K.relu(x))(_X) gate1 = Dense(32,activation='linear',use_bias = True)(conv_output) gate2 = Dense(32, activation='linear')(_X) gate3 = Add()([gate1,gate2]) coeff = Lambda(lambda x: K.sigmoid(x))(gate3) gated_X = Multiply()([coeff,_X])
# convert train and test sets from matrices to vectors X_train = X_train.reshape(-1, original_dim) X_test = X_test.reshape(-1, original_dim) time1 = timeit.default_timer() # Encoder: q(z|x) x = Input(shape=(original_dim, )) h_q = Dense(hidden_dim, activation='relu')(x) z_mu = Dense(latent_dim)(h_q) z_log_var = Dense(latent_dim)(h_q) z_mu, z_log_var = KLDivergenceLayer()([z_mu, z_log_var]) # equivalent to sample_z() in Keras z_sigma = Lambda(lambda t: K.exp(.5 * t))(z_log_var) # z has a simple distribution N(0, 1) eps = Input(tensor=K.random_normal( shape=(K.shape(x)[0], latent_dim), mean=0.0, stddev=1.0)) z_eps = Multiply()([z_sigma, eps]) z = Add()([z_mu, z_eps]) encoder = Model(x, z_mu) # Decoder: p(x|z) decoder = Sequential([ Dense(hidden_dim, input_dim=latent_dim, activation='relu'), Dense(original_dim, activation='sigmoid') ]) x_pred = decoder(z) # train model vae = Model(inputs=[x, eps], outputs=x_pred, name='vae') vae.compile(optimizer='rmsprop', loss=nll) hist = vae.fit(X_train, X_train, shuffle=True, epochs=epochs,
def DAN_Model(img_height_size, img_width_size, n_bands, initial_conv_layers, growth_rate, dropout_rate, l_r, trans_down_1_size, trans_down_2_size, trans_down_3_size, trans_down_4_size, bottleneck_1_2_size, bottleneck_3_4_size): """ This function is used to generate the Dense Attention Network (DAN) architecture as described in the paper 'Building Extraction in Very High Resolution Imagery by Dense - Attention Networks' by Yang H., Wu P., Yao X., Wu Y., Wang B., Xu Y. (2018) Inputs: - img_height_size: Height of image patches to be used for model training - img_width_size: Width of image patches to be used for model training - n_bands: Number of channels contained in the image patches to be used for model training - initial_conv_layers: Number of convolutional layers to be used for the very first convolutional layer - growth_rate: Number of convolutional layers to be used for each layer in each dense block - dropout_rate: Dropout rate to be used during model training - l_r: Learning rate to be applied for the Adam optimizer - trans_down_1_size: Output number for feature maps for transition down level 1 - trans_down_2_size: Output number for feature maps for transition down level 2 - trans_down_3_size: Output number for feature maps for transition down level 3 - trans_down_4_size: Output number for feature maps for transition down level 4 - bottleneck_1_2_size: Output number for feature maps for bottleneck layers 1 and 2 - bottleneck_3_4_size: Output number for feature maps for bottleneck layers 3 and 4 Outputs: - dan_model: Dense Attention Network (DAN) model to be trained using input parameters and network architecture """ block_1_size = initial_conv_layers + 2 * growth_rate block_2_size = trans_down_1_size + 2 * growth_rate block_3_size = trans_down_2_size + 3 * growth_rate block_4_size = trans_down_3_size + 3 * growth_rate block_5_size = trans_down_4_size + 3 * growth_rate img_input = Input(shape=(img_height_size, img_width_size, n_bands)) batch_norm_initial = BatchNormalization()(img_input) conv_initial = Conv2D(initial_conv_layers, (7, 7), padding='same', activation='relu')(batch_norm_initial) batch_norm_1_1 = BatchNormalization()(conv_initial) layer_1_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_1_1) conv_1_layer_1_1 = concatenate([batch_norm_1_1, layer_1_1]) batch_norm_1_2 = BatchNormalization()(conv_1_layer_1_1) layer_1_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_1_2) dense_block_1 = concatenate([batch_norm_1_1, layer_1_1, layer_1_2]) batch_norm_down_1 = BatchNormalization()(dense_block_1) conv_down_1 = Conv2D(trans_down_1_size, (1, 1), padding='same', activation='relu')(batch_norm_down_1) conv_down_1 = Dropout(dropout_rate)(conv_down_1) trans_down_1 = AveragePooling2D(pool_size=(2, 2))(conv_down_1) batch_norm_2_1 = BatchNormalization()(trans_down_1) layer_2_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_2_1) conv_2_layer_2_1 = concatenate([batch_norm_2_1, layer_2_1]) batch_norm_2_2 = BatchNormalization()(conv_2_layer_2_1) layer_2_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_2_2) dense_block_2 = concatenate([batch_norm_2_1, layer_2_1, layer_2_2]) batch_norm_down_2 = BatchNormalization()(dense_block_2) conv_down_2 = Conv2D(trans_down_2_size, (1, 1), padding='same', activation='relu')(batch_norm_down_2) conv_down_2 = Dropout(dropout_rate)(conv_down_2) trans_down_2 = AveragePooling2D(pool_size=(2, 2))(conv_down_2) batch_norm_3_1 = BatchNormalization()(trans_down_2) layer_3_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_3_1) conv_3_layer_3_1 = concatenate([batch_norm_3_1, layer_3_1]) batch_norm_3_2 = BatchNormalization()(conv_3_layer_3_1) layer_3_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_3_2) conv_3_layer_3_2 = concatenate([batch_norm_3_1, layer_3_1, layer_3_2]) batch_norm_3_3 = BatchNormalization()(conv_3_layer_3_2) layer_3_3 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_3_3) dense_block_3 = concatenate( [batch_norm_3_1, layer_3_1, layer_3_2, layer_3_3]) batch_norm_down_3 = BatchNormalization()(dense_block_3) conv_down_3 = Conv2D(trans_down_3_size, (1, 1), padding='same', activation='relu')(batch_norm_down_3) conv_down_3 = Dropout(dropout_rate)(conv_down_3) trans_down_3 = AveragePooling2D(pool_size=(2, 2))(conv_down_3) batch_norm_4_1 = BatchNormalization()(trans_down_3) layer_4_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_4_1) conv_4_layer_4_1 = concatenate([batch_norm_4_1, layer_4_1]) batch_norm_4_2 = BatchNormalization()(conv_4_layer_4_1) layer_4_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_4_2) conv_4_layer_4_2 = concatenate([batch_norm_4_1, layer_4_1, layer_4_2]) batch_norm_4_3 = BatchNormalization()(conv_4_layer_4_2) layer_4_3 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_4_3) dense_block_4 = concatenate( [batch_norm_4_1, layer_4_1, layer_4_2, layer_4_3]) batch_norm_down_4 = BatchNormalization()(dense_block_4) conv_down_4 = Conv2D(trans_down_4_size, (1, 1), padding='same', activation='relu')(batch_norm_down_4) conv_down_4 = Dropout(dropout_rate)(conv_down_4) trans_down_4 = AveragePooling2D(pool_size=(2, 2))(conv_down_4) batch_norm_5_1 = BatchNormalization()(trans_down_4) layer_5_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_5_1) conv_5_layer_5_1 = concatenate([batch_norm_5_1, layer_5_1]) batch_norm_5_2 = BatchNormalization()(conv_5_layer_5_1) layer_5_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_5_2) conv_5_layer_5_2 = concatenate([batch_norm_5_1, layer_5_1, layer_5_2]) batch_norm_5_3 = BatchNormalization()(conv_5_layer_5_2) layer_5_3 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_5_3) dense_block_5 = concatenate( [batch_norm_5_1, layer_5_1, layer_5_2, layer_5_3]) deconv_block_5 = Conv2DTranspose(block_5_size, (2, 2), strides=(2, 2), padding='same', activation='relu')(dense_block_5) sigmoid_block_5 = Conv2D(block_4_size, (1, 1), padding='same', activation='sigmoid')(deconv_block_5) weighted_block_4 = Multiply()([dense_block_4, sigmoid_block_5]) spat_attn_fusion_1 = concatenate([deconv_block_5, weighted_block_4]) batch_norm_6_1 = BatchNormalization()(spat_attn_fusion_1) layer_6_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_6_1) conv_6_layer_6_1 = concatenate([batch_norm_6_1, layer_6_1]) batch_norm_6_2 = BatchNormalization()(conv_6_layer_6_1) layer_6_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_6_2) conv_6_layer_6_2 = concatenate([batch_norm_6_1, layer_6_1, layer_6_2]) batch_norm_6_3 = BatchNormalization()(conv_6_layer_6_2) layer_6_3 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_6_3) dense_block_6 = concatenate( [batch_norm_6_1, layer_6_1, layer_6_2, layer_6_3]) bottleneck_1 = Conv2D(bottleneck_1_2_size, (1, 1), padding='same', activation='relu')(dense_block_6) bottleneck_1 = Dropout(dropout_rate)(bottleneck_1) deconv_bottleneck_1 = Conv2DTranspose(bottleneck_1_2_size, (2, 2), strides=(2, 2), padding='same', activation='relu')(bottleneck_1) sigmoid_bottleneck_1 = Conv2D(block_3_size, (1, 1), padding='same', activation='sigmoid')(deconv_bottleneck_1) weighted_block_3 = Multiply()([dense_block_3, sigmoid_bottleneck_1]) spat_attn_fusion_2 = concatenate([deconv_bottleneck_1, weighted_block_3]) batch_norm_7_1 = BatchNormalization()(spat_attn_fusion_2) layer_7_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_7_1) conv_7_layer_7_1 = concatenate([batch_norm_7_1, layer_7_1]) batch_norm_7_2 = BatchNormalization()(conv_7_layer_7_1) layer_7_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_7_2) conv_7_layer_7_2 = concatenate([batch_norm_7_1, layer_7_1, layer_7_2]) batch_norm_7_3 = BatchNormalization()(conv_7_layer_7_2) layer_7_3 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_7_3) dense_block_7 = concatenate( [batch_norm_7_1, layer_7_1, layer_7_2, layer_7_3]) bottleneck_2 = Conv2D(bottleneck_1_2_size, (1, 1), padding='same', activation='relu')(dense_block_7) bottleneck_2 = Dropout(dropout_rate)(bottleneck_2) deconv_bottleneck_2 = Conv2DTranspose(bottleneck_1_2_size, (2, 2), strides=(2, 2), padding='same', activation='relu')(bottleneck_2) sigmoid_bottleneck_2 = Conv2D(block_2_size, (1, 1), padding='same', activation='sigmoid')(deconv_bottleneck_2) weighted_block_2 = Multiply()([dense_block_2, sigmoid_bottleneck_2]) spat_attn_fusion_3 = concatenate([deconv_bottleneck_2, weighted_block_2]) batch_norm_8_1 = BatchNormalization()(spat_attn_fusion_3) layer_8_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_8_1) conv_8_layer_8_1 = concatenate([batch_norm_8_1, layer_8_1]) batch_norm_8_2 = BatchNormalization()(conv_8_layer_8_1) layer_8_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_8_2) conv_8_layer_8_2 = concatenate([batch_norm_8_1, layer_8_1, layer_8_2]) batch_norm_8_3 = BatchNormalization()(conv_8_layer_8_2) layer_8_3 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_8_3) dense_block_8 = concatenate( [batch_norm_8_1, layer_8_1, layer_8_2, layer_8_3]) bottleneck_3 = Conv2D(bottleneck_3_4_size, (1, 1), padding='same', activation='relu')(dense_block_8) bottleneck_3 = Dropout(dropout_rate)(bottleneck_3) deconv_bottleneck_3 = Conv2DTranspose(bottleneck_3_4_size, (2, 2), strides=(2, 2), padding='same', activation='relu')(bottleneck_3) sigmoid_bottleneck_3 = Conv2D(block_1_size, (1, 1), padding='same', activation='sigmoid')(deconv_bottleneck_3) weighted_block_1 = Multiply()([dense_block_1, sigmoid_bottleneck_3]) spat_attn_fusion_4 = concatenate([deconv_bottleneck_3, weighted_block_1]) batch_norm_9_1 = BatchNormalization()(spat_attn_fusion_4) layer_9_1 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_9_1) conv_9_layer_9_1 = concatenate([batch_norm_9_1, layer_9_1]) batch_norm_9_2 = BatchNormalization()(conv_9_layer_9_1) layer_9_2 = Conv2D(growth_rate, (3, 3), padding='same', activation='relu')(batch_norm_9_2) dense_block_9 = concatenate([batch_norm_9_1, layer_9_1, layer_9_2]) bottleneck_4 = Conv2D(bottleneck_3_4_size, (1, 1), padding='same', activation='relu')(dense_block_9) bottleneck_4 = Dropout(dropout_rate)(bottleneck_4) pred_layer = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(bottleneck_4) dan_model = Model(inputs=img_input, outputs=pred_layer) dan_model.compile(loss='binary_crossentropy', optimizer=Adam(lr=l_r), metrics=['binary_crossentropy']) return dan_model
img_inputs = Input(shape=input_dim) conv1 = Conv2D(1, (1, 1), padding='same', activation='relu')(img_inputs) #1 conv = Conv2D(128, (4, 4), padding='same', activation='relu')(conv1) #2 conv = BatchNormalization()(conv) #Attention 1 y = Conv2D(1, (1, 1))(conv) # 32x32x1 ? y = Permute((3, 2, 1))(y) y = Dense(32, activation='softmax')(y) y = Permute((1, 3, 2))(y) y = Dense(32, activation='softmax')(y) y = Permute((1, 3, 2))(y) #now permute back y = Permute((3, 2, 1))(y) #end attention mult = Multiply()([conv, y]) pooled = MaxPooling2D(pool_size=(2, 2))(mult) pooled = Dropout(0.2)(pooled) conv = Conv2D(128, (3, 3), padding='same', activation='relu')(pooled) conv = BatchNormalization()(conv) #Attention 2 y = Conv2D(1, (1, 1))(conv) # 32x32x1 ? y = Permute((3, 2, 1))(y) y = Dense(16, activation='softmax')(y) y = Permute((1, 3, 2))(y) y = Dense(16, activation='softmax')(y) y = Permute((1, 3, 2))(y) #now permute back y = Permute((3, 2, 1))(y) #end attention mult = Multiply()([conv, y])
def encoder_decoder(data): print('Encoder_Decoder LSTM...') """__encoder___""" encoder_inputs = Input(shape=en_shape) encoder_LSTM = LSTM(hidden_units, dropout_U=0.2, dropout_W=0.2, return_sequences=True, return_state=True) encoder_LSTM_rev = LSTM(hidden_units, return_state=True, return_sequences=True, dropout_U=0.05, dropout_W=0.05, go_backwards=True) encoder_outputs, state_h, state_c = encoder_LSTM(encoder_inputs) encoder_outputsR, state_hR, state_cR = encoder_LSTM_rev(encoder_inputs) state_hfinal = Add()([state_h, state_hR]) state_cfinal = Add()([state_c, state_cR]) encoder_outputs_final = Add()([encoder_outputs, encoder_outputsR]) encoder_states = [state_hfinal, state_cfinal] """____decoder___""" decoder_inputs = Input(shape=(None, de_shape[1])) decoder_LSTM = LSTM(hidden_units, return_sequences=True, dropout_U=0.2, dropout_W=0.2, return_state=True) decoder_outputs, _, _ = decoder_LSTM(decoder_inputs, initial_state=encoder_states) #Pull out XGBoost, (I mean attention) attention = TimeDistributed(Dense( 1, activation='tanh'))(encoder_outputs_final) attention = Flatten()(attention) attention = Multiply()([decoder_outputs, attention]) attention = Activation('softmax')(attention) attention = Permute([2, 1])(attention) decoder_dense = Dense(de_shape[1], activation='softmax') decoder_outputs = decoder_dense(attention) model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs) print(model.summary()) rmsprop = RMSprop(lr=learning_rate, clipnorm=clip_norm) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy']) x_train, x_test, y_train, y_test = tts(data["article"], data["summaries"], test_size=0.20) history = model.fit(x=[x_train, y_train], y=y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=([x_test, y_test], y_test)) print(model.summary()) """_________________inference mode__________________""" encoder_model_inf = Model(encoder_inputs, encoder_states) decoder_state_input_H = Input(shape=(en_shape[0], )) decoder_state_input_C = Input(shape=(en_shape[0], )) decoder_state_inputs = [decoder_state_input_H, decoder_state_input_C] decoder_outputs, decoder_state_h, decoder_state_c = decoder_LSTM( decoder_inputs, initial_state=decoder_state_inputs) decoder_states = [decoder_state_h, decoder_state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model_inf = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states) scores = model.evaluate([x_test, y_test], y_test, verbose=1) print('LSTM test scores:', scores) print('\007') print(model.summary()) return model, encoder_model_inf, decoder_model_inf, history
def create_siamese_lstm_dssm_mdoel(embedding_matrix, embedding_word_matrix, model_param, embedding_size=300, max_sentence_length=20, max_word_length=25): # 第一部分 # step 1 定义复杂模型的输入 num_conv2d_layers = 1 filters_2d = [6, 12] kernel_size_2d = [[3, 3], [3, 3]] mpool_size_2d = [[2, 2], [2, 2]] left_input = Input(shape=(max_sentence_length, ), dtype='int32') right_input = Input(shape=(max_sentence_length, ), dtype='int32') # 定义需要使用的网络层 embedding_layer1 = Embedding(input_dim=len(embedding_matrix, ), output_dim=embedding_size, weights=[embedding_matrix], trainable=True, input_length=max_sentence_length) att_layer1 = AttentionLayer(20) bi_lstm_layer = Bidirectional(LSTM(model_param['lstm_units'])) lstm_layer1 = LSTM(model_param['lstm_units'], return_sequences=True) lstm_layer2 = LSTM(model_param['lstm_units']) # 组合模型结构,两个输入添加Embeding层 s1 = embedding_layer1(left_input) s2 = embedding_layer1(right_input) # 在Embeding层上添加双向LSTM层 s1_bi = bi_lstm_layer(s1) s2_bi = bi_lstm_layer(s2) # 另在Embeding层上添加双层LSTM层 s1_lstm_lstm = lstm_layer2(lstm_layer1(s1)) s2_lstm_lstm = lstm_layer2(lstm_layer1(s2)) s1_lstm = lstm_layer1(s1) s2_lstm = lstm_layer1(s2) # cnn_input_layer = dot([s1_lstm, s2_lstm], axes=-1) cnn_input_layer_dot = Reshape((20, 20, -1))(cnn_input_layer) layer_conv1 = Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(cnn_input_layer_dot) z = MaxPooling2D(pool_size=(2, 2))(layer_conv1) for i in range(num_conv2d_layers): z = Conv2D(filters=filters_2d[i], kernel_size=kernel_size_2d[i], padding='same', activation='relu')(z) z = MaxPooling2D(pool_size=(mpool_size_2d[i][0], mpool_size_2d[i][1]))(z) pool1_flat = Flatten()(z) # # print pool1_flat pool1_flat_drop = Dropout(rate=0.1)(pool1_flat) ccn1 = Dense(32, activation='relu')(pool1_flat_drop) ccn2 = Dense(16, activation='relu')(ccn1) # 另在Embeding层上添加attention层 s1_att = att_layer1(s1) s2_att = att_layer1(s2) # 组合在Embeding层上添加attention层和在Embeding层上添加双向LSTM层 s1_last = Concatenate(axis=1)([s1_att, s1_bi]) s2_last = Concatenate(axis=1)([s2_att, s2_bi]) cos_layer = ConsDist()([s1_last, s2_last]) man_layer = ManDist()([s1_last, s2_last]) # 第二部分 left_w_input = Input(shape=(max_word_length, ), dtype='int32') right_w_input = Input(shape=(max_word_length, ), dtype='int32') # 定义需要使用的网络层 embedding_layer2 = Embedding(input_dim=len(embedding_word_matrix, ), output_dim=embedding_size, weights=[embedding_word_matrix], trainable=True, input_length=max_word_length) lstm_word_bi_layer = Bidirectional(LSTM(6)) att_layer2 = AttentionLayer(25) s1_words = embedding_layer2(left_w_input) s2_words = embedding_layer2(right_w_input) # s1_word_lstm = lstm_layer1(s1_words) # s2_word_lstm = lstm_layer1(s2_words) # # cnn_input_layer1 = dot([s1_word_lstm, s2_word_lstm], axes=-1) # cnn_input_layer_dot1 = Reshape((25, 25, -1))(cnn_input_layer1) # layer_conv11 = Conv2D(filters=8, kernel_size=3, padding='same', activation='relu')(cnn_input_layer_dot1) # z1 = MaxPooling2D(pool_size=(2, 2))(layer_conv11) # # for i in range(num_conv2d_layers): # z1 = Conv2D(filters=filters_2d[i], kernel_size=kernel_size_2d[i], padding='same', activation='relu')(z1) # z1 = MaxPooling2D(pool_size=(mpool_size_2d[i][0], mpool_size_2d[i][1]))(z1) # # pool1_flat1 = Flatten()(z1) # # print pool1_flat # pool1_flat_drop1 = Dropout(rate=0.1)(pool1_flat1) # mlp11 = Dense(32, activation='relu')(pool1_flat_drop1) # mlp21 = Dense(16, activation='relu')(mlp11) s1_words_bi = lstm_word_bi_layer(s1_words) s2_words_bi = lstm_word_bi_layer(s2_words) s1_words_att = att_layer2(s1_words) s2_words_att = att_layer2(s2_words) s1_words_last = Concatenate(axis=1)([s1_words_att, s1_words_bi]) s2_words_last = Concatenate(axis=1)([s2_words_att, s2_words_bi]) cos_layer1 = ConsDist()([s1_words_last, s2_words_last]) man_layer1 = ManDist()([s1_words_last, s2_words_last]) # 第三部分,前两部分模型组合 s1_s2_mul = Multiply()([s1_last, s2_last]) s1_s2_sub = Lambda(lambda x: K.abs(x))(Subtract()([s1_last, s2_last])) s1_s2_maxium = Maximum()( [Multiply()([s1_last, s1_last]), Multiply()([s2_last, s2_last])]) s1_s2_sub1 = Lambda(lambda x: K.abs(x))( Subtract()([s1_lstm_lstm, s2_lstm_lstm])) s1_words_s2_words_mul = Multiply()([s1_words_last, s2_words_last]) s1_words_s2_words_sub = Lambda(lambda x: K.abs(x))( Subtract()([s1_words_last, s2_words_last])) s1_words_s2_words_maxium = Maximum()([ Multiply()([s1_words_last, s1_words_last]), Multiply()([s2_words_last, s2_words_last]) ]) last_list_layer = Concatenate(axis=1)([ s1_s2_mul, s1_s2_sub, s1_s2_sub1, s1_s2_maxium, s1_words_s2_words_mul, s1_words_s2_words_sub, s1_words_s2_words_maxium ]) last_list_layer = Dropout(0.05)(last_list_layer) # Dense 层 dense_layer1 = Dense(32, activation='relu')(last_list_layer) dense_layer2 = Dense(48, activation='sigmoid')(last_list_layer) output_layer = Concatenate(axis=1)([ dense_layer1, dense_layer2, cos_layer, man_layer, cos_layer1, man_layer1, ccn2 ]) # Step4 定义输出层 output_layer = Dense(1, activation='sigmoid')(output_layer) model = Model( inputs=[left_input, right_input, left_w_input, right_w_input], outputs=[output_layer], name="simaese_lstm_attention") model.compile( # categorical_crossentropy,contrastive_loss,binary_crossentropy loss='binary_crossentropy', optimizer='adam', metrics=["accuracy", fbeta_score, precision, recall]) return model
kernel_initializer='glorot_uniform')(dropout_1) dropout_2 = Dropout(0.7)(dense_1) dense_2 = Dense(units=75, activation="relu", kernel_initializer='glorot_uniform')(dropout_2) dropout_3 = Dropout(0.3)(dense_2) dense_3 = Dense(units=60, activation="relu", kernel_initializer='glorot_uniform')(dropout_3) #model for epigenetics feature NU = Input(shape=(1, )) dense1_nu = Dense(units=60, activation="relu", kernel_initializer='glorot_uniform')(NU) mult = Multiply()([dense_3, dense1_nu]) out = Dense(units=1, activation="linear")(mult) #dense_out = Dense(units=1, activation="linear")(dense_3) model = Model(inputs=[SEQ, NU], outputs=out) model.summary() #adam = SGD(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.01) #adam = SGD(lr=0.01, momentum=0.99, decay=0.01, nesterov=False) adam = Adam(lr=0.001) model.compile(loss='mean_squared_error', optimizer=adam) checkpointer = ModelCheckpoint(filepath="cas9.hdf5", verbose=1, monitor='val_loss', save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=400, verbose=1)
## A if model=='A': controller_input = Input(shape=(14,12),name='New_Input') MEMORY = Lambda(lambda x: K.zeros(shape=(1,120,40)),name='Memory_0')(controller_input) usage_weights = Lambda(lambda x: K.zeros(shape=(1,1,120)),name='Usage_Weights_0')(controller_input) read_weights = Lambda(lambda x: K.zeros(shape=(1,14,120)),name='Read_Weights_0')(controller_input) controller = LSTM(units=200, activation='tanh',stateful=False, return_sequences=True,name='LSTM_CONTROLLER')(controller_input) write_keys = Dense(40, activation='tanh',name='Write_Keys')(controller) read_keys = Dense(40, activation='tanh',name='Read_Keys')(controller) omegas = Dense(1, activation='sigmoid',name='Omegas')(controller) least_usage = Lambda(lambda x: K.one_hot(indices=K.argmax(-x),num_classes=120),name='Least_Usage')(usage_weights) omegas_tiled = Lambda(lambda x: K.tile(x,(1,1,120)))(omegas) compl_omegas = Lambda(lambda o: K.ones(shape=(14,120)) - o)(omegas_tiled) rd_part = Multiply()([omegas_tiled, read_weights]) us_part = Multiply()([compl_omegas, least_usage]) write_weights = Add(name='Write_Weights')([rd_part,us_part]) writing = Dot(axes=[1,1])([write_weights, write_keys]) MEMORY = Add(name='Memory')([MEMORY, writing]) cos_sim = Dot(axes=[2,2], normalize=True,name='Cosine_Similarity')([read_keys,MEMORY]) read_weights = Lambda(lambda x: softmax(x,axis=1),name='Read_Weights')(cos_sim) write_weights_summed = Lambda(lambda x: K.sum(x,axis=1,keepdims=True))(write_weights) read_weights_summed = Lambda(lambda x: K.sum(x,axis=1,keepdims=True))(read_weights) decay_usage = Lambda(lambda x: K.constant(0.99, shape=(1,120))*x)(usage_weights) usage_weights = Add(name='Usage_Weights')([decay_usage, read_weights_summed, write_weights_summed]) retrieved_memory = Dot(axes=[2,1],name='Retrieved_Memories')([read_weights, MEMORY]) controller_output = concatenate([controller, retrieved_memory],name='Controller_Output') main_output = Dense(6,activation='sigmoid',name='Final_Output')(controller_output) M = Model(inputs=controller_input, outputs=[main_output])
# model for seq DeepCpf1_Input_SEQ = Input(shape=(40, 4)) DeepCpf1_C1 = Convolution1D(80, 5, activation='relu')(DeepCpf1_Input_SEQ) DeepCpf1_P1 = AveragePooling1D(2)(DeepCpf1_C1) DeepCpf1_F = Flatten()(DeepCpf1_P1) DeepCpf1_DO1 = Dropout(0.3)(DeepCpf1_F) DeepCpf1_D1 = Dense(80, activation='relu')(DeepCpf1_DO1) DeepCpf1_DO2 = Dropout(0.3)(DeepCpf1_D1) DeepCpf1_D2 = Dense(40, activation='relu')(DeepCpf1_DO2) DeepCpf1_DO3 = Dropout(0.3)(DeepCpf1_D2) DeepCpf1_D3_SEQ = Dense(40, activation='relu')(DeepCpf1_DO3) DeepCpf1_Input_CA = Input(shape=(1, )) DeepCpf1_D3_CA = Dense(40, activation='relu')(DeepCpf1_Input_CA) DeepCpf1_M = Multiply()([DeepCpf1_D3_SEQ, DeepCpf1_D3_CA]) DeepCpf1_DO4 = Dropout(0.3)(DeepCpf1_M) DeepCpf1_Output = Dense(1, activation='linear')(DeepCpf1_DO4) DeepCpf1 = Model(inputs=[DeepCpf1_Input_SEQ, DeepCpf1_Input_CA], outputs=[DeepCpf1_Output]) DeepCpf1.summary() adam = Adam(lr=0.001) DeepCpf1.compile(loss='mean_squared_error', optimizer=adam) checkpointer = ModelCheckpoint(filepath="cas9.hdf5", verbose=1, monitor='val_loss', save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=1) DeepCpf1.fit([train_x, train_nu],
print 'WE only' gru_kata = Bidirectional(GRU(EMBEDDING_DIM, return_sequences=True, dropout=dropout, recurrent_dropout=rec_dropout), merge_mode=merge_m, weights=None)( embedded_sequences) elif model_choice == 2: print 'CE only' gru_kata = Bidirectional(GRU(EMBEDDING_DIM, return_sequences=True, dropout=dropout, recurrent_dropout=rec_dropout), merge_mode=merge_m, weights=None)( rtwo) else: # combine = input('Enter 1 for Add, 2 for Subtract, 3 for Multiply, 4 for Average, 5 for Maximum: ') combine = sys.argv[5] print 'Merge layer:', combine print 'Both WE & CE' if combine == 2: merge = Subtract()([embedded_sequences, rtwo]) elif combine == 3: merge = Multiply()([embedded_sequences, rtwo]) elif combine == 4: merge = Average()([embedded_sequences, rtwo]) elif combine == 5: merge = Maximum()([embedded_sequences, rtwo]) else: merge = Add()([embedded_sequences, rtwo]) gru_kata = Bidirectional(GRU(EMBEDDING_DIM, return_sequences=True, dropout=dropout, recurrent_dropout=rec_dropout), merge_mode=merge_m, weights=None)( merge) crf = CRF(len(label.index) + 1, learn_mode='marginal')(gru_kata) preds = Dense(len(label.index) + 1, activation='softmax')(gru_kata) print "Model Choice:" # model_choice = input('Enter 1 for CRF or 2 for Dense layer: ')
def build_LSTM_model(trainData, trainBatches, testData, testBatches, windowSize, class_count, numCalls, batch_size): # Specify number of units # https://stackoverflow.com/questions/37901047/what-is-num-units-in-tensorflow-basiclstmcell#39440218 num_units = 128 embedding_size = 256 # https://keras.io/callbacks/#earlystopping early_stop = cb.EarlyStopping(monitor='sparse_categorical_accuracy', min_delta=0.0001, patience=3) # We need to add an embedding layer because LSTM (at this moment) that the API call indices (numbers) # are of some mathematical significance. E.g., system call 2 is "closer" to system calls 3 and 4. # But system call numbers have nothing to do with their semantic meaning and relation to other # system calls. So we transform it using an embedding layer so the LSTM can figure these relationships # out for itself. # https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html # https://stackoverflow.com/questions/40695452/stateful-lstm-with-embedding-layer-shapes-dont-match api_count = numCalls + 1 # +1 because 0 is our padding number inp = Input(shape=(windowSize, )) emb = Embedding(input_dim=api_count, output_dim=256, input_length=windowSize)(inp) # https://keras.io/layers/recurrent/#lstm # model.add(LSTM(num_units,input_shape=(windowSize, api_count),return_sequences=False)) #TODO - GPU stuffs # model.add(CuDNNLSTM(num_units,input_shape=(windowSize, api_count),return_sequences=False)) # From malconv paper filt = Conv1D(filters=64, kernel_size=3, strides=1, use_bias=True, activation='relu', padding='valid')(emb) attn = Conv1D(filters=64, kernel_size=3, strides=1, use_bias=True, activation='sigmoid', padding='valid')(emb) gated = Multiply()([filt, attn]) drop = Dropout(0.5)(gated) feat = GlobalMaxPooling1D()(drop) dense = Dense(128, activation='relu')(feat) outp = Dense(class_count, activation='sigmoid')(dense) model = Model(inp, outp) # Which optimizer to use # https://keras.io/optimizers/ opt = optimizers.RMSprop(lr=0.01, decay=0.001) # https://keras.io/models/model/#compile model.compile( loss='sparse_categorical_crossentropy', optimizer=opt, # Metrics to print # We use sparse_categorical_accuracy as opposed to categorical_accuracy # because: https://stackoverflow.com/questions/44477489/keras-difference-between-categorical-accuracy-and-sparse-categorical-accuracy # I.e., since we don't use hot-encoding, we use sparse_categorical_accuracy metrics=['sparse_categorical_accuracy']) # https://keras.io/models/model/#fit_generator hist = model.fit_generator( # Data to train trainData, # Use multiprocessing because python Threading isn't really # threading: https://docs.python.org/3/glossary.html#term-global-interpreter-lock use_multiprocessing=True, # Number of steps per epoch (this is how we train our large # number of samples dataset without running out of memory) steps_per_epoch=trainBatches, #TODO # Number of epochs epochs=100, # Validation data (will not be trained on) validation_data=testData, validation_steps=testBatches, # Do not shuffle batches. shuffle=False, # List of callbacks to be called while training. callbacks=[early_stop]) return model, hist
Discriminator = Model(outputs=Score, inputs=_input) Discriminator.trainable = True Discriminator.compile(loss='mse', optimizer='adam') #### Combine the two networks to become MetricGAN Discriminator.trainable = False Clean_reference = Input(shape=(257, None, 1)) Noisy_LP = Input(shape=(257, None, 1)) Min_mask = Input(shape=(257, None, 1)) Reshape_de_model_output = Reshape((257, -1, 1))(de_model.output) Mask = Maximum()([Reshape_de_model_output, Min_mask]) Enhanced = Multiply()([Mask, Noisy_LP]) Discriminator_input = Concatenate(axis=-1)([Enhanced, Clean_reference]) # Here the input of Discriminator is (Noisy, Clean) pair, so a clean reference is needed!! Predicted_score = Discriminator(Discriminator_input) MetricGAN = Model(inputs=[de_model.input, Noisy_LP, Clean_reference, Min_mask], outputs=Predicted_score) MetricGAN.compile(loss='mse', optimizer='adam') ######## Model define end ######### Test_PESQ = [] Test_STOI = [] Test_Predicted_STOI_list = [] Train_Predicted_STOI_list = [] Previous_Discriminator_training_list = [] shutil.rmtree(output_path) creatdir(output_path)
def get_layer(inp_a, inp_object_s, inp_type, inp_predicate, inp_tag): def load_embedding(toka, max_features): def get_coefs(token, *arr): return token, np.asarray(arr, dtype='float32') embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path, encoding="utf-8")) word_index = toka.word_index nub_words = min(max_features, len(word_index)) embedding_matrix_ = np.zeros((nub_words + 1, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embedding_index.get(word) if embedding_vector is not None: embedding_matrix_[i] = embedding_vector return embedding_matrix_, nub_words def get_pooling(x): avg_pool_x = GlobalAveragePooling1D()(x) max_pool_x = GlobalMaxPooling1D()(x) return avg_pool_x, max_pool_x embedding_matrix, nb_words = load_embedding(tk, 10_0000) predicate_embedding_matrix, predicate_nb_words = load_embedding(tk_predicate, 41810) embed_layer_a = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False) embed_layer_b = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False) embed_layer_c = Embedding(nb_words + 1, embed_size, weights=[embedding_matrix], trainable=False) embed_predicate_layer = Embedding(predicate_nb_words + 1, embed_size, weights=[predicate_embedding_matrix], trainable=False) x_a = embed_layer_a(inp_a) x_b = embed_layer_b(inp_object_s) x_tag = embed_layer_c(inp_tag) x_predicate = embed_predicate_layer(inp_predicate) x_a = SpatialDropout1D(0.3)(x_a) x_b = SpatialDropout1D(0.3)(x_b) x_tag = SpatialDropout1D(0.3)(x_tag) x_predicate = SpatialDropout1D(0.3)(x_predicate) xc_a = Bidirectional(CuDNNLSTM(32, return_sequences=True))(x_a) xc_b = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x_b) x_predicate = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x_predicate) x_tag = Bidirectional(CuDNNLSTM(64, return_sequences=True))(x_tag) xc_a_3 = Conv1D(32, kernel_size=3, padding='valid', kernel_initializer='he_uniform', activation="relu")(xc_a) xc_a_2 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform', activation="relu")(xc_a) # x_entity_3 = Conv1D(32, kernel_size=3, padding='valid', kernel_initializer='he_uniform', activation="relu")(x_entity) # x_entity_2 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform', activation="relu")(x_entity) xc_tag_3 = Conv1D(32, kernel_size=3, padding='valid', kernel_initializer='he_uniform', activation="relu")(x_tag) xc_tag_2 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform', activation="relu")(x_tag) xc_b_3 = Conv1D(32, kernel_size=3, padding='valid', kernel_initializer='he_uniform', activation="relu")(xc_b) xc_b_2 = Conv1D(32, kernel_size=2, padding='valid', kernel_initializer='he_uniform', activation="relu")(xc_b) # avg_pool_entity3, max_pool_entity3 = get_pooling(x_entity_3) # avg_pool_entity2, max_pool_entity2 = get_pooling(x_entity_2) avg_pool_a3, max_pool_a3 = get_pooling(xc_a_3) avg_pool_a2, max_pool_a2 = get_pooling(xc_a_2) avg_pool_predicate, max_pool_predicate = get_pooling(x_predicate) avg_pool_b3, max_pool_b3 = get_pooling(xc_b_3) avg_pool_b2, max_pool_b2 = get_pooling(xc_b_2) avg_pool_tag3, max_pool_tag3 = get_pooling(xc_tag_3) avg_pool_tag2, max_pool_tag2 = get_pooling(xc_tag_2) x_tag = concatenate([avg_pool_tag3, max_pool_tag3, avg_pool_tag2, max_pool_tag2]) x_tag = BatchNormalization()(x_tag) x_tag = Dropout(0.3)(Dense(32, activation='relu')(x_tag)) x_predicate = concatenate([avg_pool_predicate, max_pool_predicate]) x_predicate = BatchNormalization()(x_predicate) x_predicate = Dropout(0.3)(Dense(32, activation='relu')(x_predicate)) x_a = concatenate([avg_pool_a3, max_pool_a3, avg_pool_a2, max_pool_a2]) x_a = BatchNormalization()(x_a) x_a = Dropout(0.3)(Dense(32, activation='relu')(x_a)) x_b = concatenate([avg_pool_b3, max_pool_b3, avg_pool_b2, max_pool_b2]) # x_b = BatchNormalization()(x_b) # x_b = Dropout(0.3)(Dense(128, activation='relu')(x_b)) x_b = BatchNormalization()(x_b) x_b = Dropout(0.3)(Dense(32, activation='relu')(x_b)) x_e = concatenate([x_b, x_predicate, x_tag]) x_e = BatchNormalization()(x_e) x_e = Dropout(0.1)(Dense(32, activation='relu')(x_e)) # xm_b = Multiply()([x_a, x_b]) # xm_b = BatchNormalization()(xm_b) # xm_b = Dropout(0.2)(Dense(32, activation='relu')(xm_b)) # # xm_tag = Multiply()([x_a, x_tag]) # xm_tag = BatchNormalization()(xm_tag) # xm_tag = Dropout(0.2)(Dense(32, activation='relu')(xm_tag)) xm = Multiply()([x_a, x_e]) xm = BatchNormalization()(xm) xm = Dropout(0.3)(Dense(32, activation='relu')(xm)) # d1 = Dot(1)([x_a, x_e]) # d2 = Dot(1)([x_a, x_e2]) # xm = concatenate([xm_b, xm_e, xm_tag]) # xm = BatchNormalization()(xm) # xm = Dropout(0.2)(Dense(128, activation='relu')(xm)) # x = concatenate([x_tag, x_predicate, xm_tag, xm_b, x_a, x_b, inp_type]) # x = concatenate([x_tag, x_predicate, x_b, inp_type]) # x = BatchNormalization()(x) # x = Dropout(0.2)(Dense(128, activation='relu')(x)) x = concatenate([x_a, x_e, xm, inp_type]) x = BatchNormalization()(x) x = Dropout(0.2)(Dense(128, activation='tanh')(x)) x = BatchNormalization()(x) x = Dropout(0.2)(Dense(32, activation='tanh')(x)) out = Dense(2, activation="sigmoid")(x) return out
def score_refine_module(input_feature_map, map_name=None): with tf.variable_scope("score_refine_module" + map_name, reuse=tf.AUTO_REUSE): if keras.backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = Conv2D(256, (1, 1), kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(input_feature_map) x = BatchNormalization(axis=bn_axis, name=map_name + 'bn_SR_map')(x) x = Activation('relu')(x) # score head: score_map_s1 = Conv2D(256, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(x) score_map_s1 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s1_1')(score_map_s1) score_map_s1 = Activation('relu')(score_map_s1) score_map_s2 = Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(x) score_map_s2 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s2_1')(score_map_s2) score_map_s2 = Activation('relu')(score_map_s2) score_map_s1s2 = Add()([score_map_s1, score_map_s2]) score_map_s1 = Conv2D(256, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) score_map_s1 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s1_2')(score_map_s1) score_map_s1 = Activation('relu')(score_map_s1) score_map_s2 = Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) score_map_s2 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s2_2')(score_map_s2) score_map_s2 = Activation('relu')(score_map_s2) score_map_s1s2 = Add()([score_map_s1, score_map_s2]) score_map_s1 = Conv2D(256, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) score_map_s1 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s1_3')(score_map_s1) score_map_s1 = Activation('relu')(score_map_s1) score_map_s2 = Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) score_map_s2 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s2_3')(score_map_s2) score_map_s2 = Activation('relu')(score_map_s2) score_map_s1s2 = Add()([score_map_s1, score_map_s2]) score_map_s1 = Conv2D(256, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) score_map_s1 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s1_4')(score_map_s1) score_map_s1 = Activation('relu')(score_map_s1) score_map_s2 = Conv2D(256, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) score_map_s2 = BatchNormalization(axis=bn_axis, name=map_name + 'bn_s2_4')(score_map_s2) score_map_s2 = Activation('relu')(score_map_s2) score_map_s1s2 = Add()([score_map_s1, score_map_s2]) score_map = Conv2D(5, (1, 1), padding='same', activation="relu", kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(score_map_s1s2) # locate head locate_head = Conv2D(64, (1, 1), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(x) locate_head = BatchNormalization(axis=bn_axis, name=map_name + 'bn_l1_1')(locate_head) locate_head = Activation('relu')(locate_head) locate_head = Conv2D(64, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(locate_head) locate_head = BatchNormalization(axis=bn_axis, name=map_name + 'bn_l1_2')(locate_head) locate_head = Activation('relu')(locate_head) locate_head = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(locate_head) locate_head = BatchNormalization(axis=bn_axis, name=map_name + 'bn_l1_3')(locate_head) locate_head = Activation('relu')(locate_head) locate_map = Conv2D(1, (1, 1), padding='same', activation="sigmoid", kernel_initializer='he_normal', name=map_name + "_locate", kernel_regularizer=regularizers.l2( myModelConfig.weight_decay))(locate_head) refined_map = Multiply(name=map_name + "refine_score_map")([score_map, locate_map]) return refined_map, locate_map
def create_model(self): init_img_width = self.img_width // 4 init_img_height = self.img_height // 4 #GENERATOR ARCHITECTURE random_input = Input(shape=(self.random_input_dim,)) text_input1 = Input(shape=(self.text_input_dim,)) random_dense = Dense(self.random_input_dim)(random_input) text_layer1 = Dense(1024)(text_input1) merged = concatenate([random_dense, text_layer1]) generator_layer = Dense(128 * init_img_width * init_img_height, activation='relu')(merged) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Reshape((init_img_width, init_img_height , 128)) (generator_layer) generator_layer = Conv2D(128,kernel_size=4 , strides=1 , padding='same')(generator_layer) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Conv2DTranspose(128,kernel_size=4 , strides=2 , padding='same')(generator_layer) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Conv2D(128,kernel_size=5 , strides=1 , padding='same')(generator_layer) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Conv2DTranspose(128,kernel_size=4 , strides=2 , padding='same')(generator_layer) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Conv2D(128, kernel_size=5 , strides=1 , padding='same')(generator_layer) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Conv2D(128, kernel_size=5 , strides=1 , padding='same')(generator_layer) generator_layer = BatchNormalization(momentum=0.9)(generator_layer) generator_layer = LeakyReLU(alpha=0.1)(generator_layer) generator_layer = Conv2D(3,kernel_size= 5 , strides=1 , padding='same')(generator_layer) generator_output = Activation('tanh')(generator_layer) self.generator = Model([random_input, text_input1], generator_output) print('\nGENERATOR:\n') print('generator: ', self.generator.summary()) print("\n\n") self.generator.compile(loss='binary_crossentropy', optimizer= Adam(0.0002,0.5), metrics=["accuracy"]) plot_model(self.generator, to_file = "generator_model.jpg", show_shapes = True) #DISCRIMINATOR MODEL text_input2 = Input(shape=(self.text_input_dim,)) text_layer2 = Dense(1024)(text_input2) img_input2 = Input(shape=(self.img_width, self.img_height, self.img_channels)) # first typical convlayer outputs a 20x20x256 matrix img_layer2 = Conv2D(filters=256, kernel_size=8 , strides=1, padding='valid', name='conv1')(img_input2) #kernel_size=9 img_layer2 = LeakyReLU()(img_layer2) # original 'Dynamic Routing Between Capsules' paper does not include the batch norm layer after the first conv group img_layer2 = BatchNormalization(momentum=0.8)(img_layer2) img_layer2 = Conv2D(filters=256, kernel_size=8 , strides=2, padding='valid', name='conv2')(img_layer2) #kernel_size=9 img_layer2 = LeakyReLU()(img_layer2) # original 'Dynamic Routing Between Capsules' paper does not include the batch norm layer after the first conv group img_layer2 = BatchNormalization(momentum=0.8)(img_layer2) #NOTE: Capsule architecture starts from here. # primarycaps coming first # filters 512 (n_vectors=8 * channels=64) img_layer2 = Conv2D(filters=8 * 64, kernel_size=8, strides=2, padding='valid', name='primarycap_conv2_1')(img_layer2) #img_layer2 = Conv2D(filters=8 * 64, kernel_size=8, strides=2, padding='valid', name='primarycap_conv2_2')(img_layer2) # reshape into the 8D vector for all 32 feature maps combined # (primary capsule has collections of activations which denote orientation of the digit # while intensity of the vector which denotes the presence of the digit) img_layer2 = Reshape(target_shape=[-1, 8], name='primarycap_reshape')(img_layer2) # the purpose is to output a number between 0 and 1 for each capsule where the length of the input decides the amount img_layer2 = Lambda(squash, name='primarycap_squash')(img_layer2) img_layer2 = BatchNormalization(momentum=0.8)(img_layer2) # digitcaps are here img_layer2 = Flatten()(img_layer2) # capsule (i) in a lower-level layer needs to decide how to send its output vector to higher-level capsules (j) # it makes this decision by changing scalar weight (c=coupling coefficient) that will multiply its output vector and then be treated as input to a higher-level capsule # uhat = prediction vector, w = weight matrix but will act as a dense layer, u = output from a previous layer # uhat = u * w # neurons 160 (num_capsules=102 * num_vectors=16) uhat = Dense(1632, kernel_initializer='he_normal', bias_initializer='zeros', name='uhat_digitcaps')(img_layer2) # c = coupling coefficient (softmax over the bias weights, log prior) | "the coupling coefficients between capsule (i) and all the capsules in the layer above sum to 1" # we treat the coupling coefficiant as a softmax over bias weights from the previous dense layer c = Activation('softmax', name='softmax_digitcaps1')(uhat) # softmax will make sure that each weight c_ij is a non-negative number and their sum equals to one # s_j (output of the current capsule level) = uhat * c c = Dense(1632)(c) # compute s_j x = Multiply()([uhat, c]) s_j = LeakyReLU()(x) # we will repeat the routing part 2 more times (num_routing=3) to unfold the loop c = Activation('softmax', name='softmax_digitcaps2')(s_j) # softmax will make sure that each weight c_ij is a non-negative number and their sum equals to one c = Dense(1632)(c) # compute s_j x = Multiply()([uhat, c]) s_j = LeakyReLU()(x) c = Activation('softmax', name='softmax_digitcaps3')(s_j) # softmax will make sure that each weight c_ij is a non-negative number and their sum equals to one c = Dense(1632)(c) # compute s_j x = Multiply()([uhat, c]) s_j = LeakyReLU()(x) merged = concatenate([s_j, text_layer2]) discriminator_layer = Activation('relu')(merged) pred = Dense(1, activation='sigmoid')(discriminator_layer) self.discriminator = Model([img_input2, text_input2], pred) print('\nDISCRIMINATOR:\n') print('discriminator: ', self.discriminator.summary()) print("\n\n") self.discriminator.compile(loss='binary_crossentropy', optimizer= Adam(0.0002, 0.5), metrics=['accuracy']) plot_model(self.discriminator, to_file = "discriminator_model.jpg", show_shapes = True) #ADVERSARIAL MODEL model_output = self.discriminator([self.generator.output, text_input1]) self.model = Model([random_input, text_input1], model_output) self.discriminator.trainable = False print('\nADVERSARIAL MODEL:\n') print('generator-discriminator:\n', self.model.summary()) print("\n\n") self.model.compile(loss='binary_crossentropy', optimizer= Adam(0.0002, 0.5) , metrics=["accuracy"]) plot_model(self.model, to_file = "model.jpg", show_shapes = True)
def build(self): A, B, P = Input((self.word_input_shape, )), Input( (self.word_input_shape, )), Input((self.word_input_shape, )) inputs = [A, B, P] if use_lingui_features: num_lingui_features = pd.read_csv(path + 'output/' + basename + '_lingui_df.csv').shape[1] dist_inputs = [ Input((1, )) for i in range(num_lingui_features + 2) ] else: dist1, dist2 = Input((self.dist_shape, )), Input( (self.dist_shape, )) dist_inputs = [dist1, dist2] self.dist_embed = Embedding(10, self.embed_dim) self.ffnn = Sequential([ Dense(self.hidden_dim, use_bias=True), Activation('relu'), Dropout(rate=0.2, seed=7), Dense(1, activation='linear') ]) dist_embeds = [self.dist_embed(dist) for dist in dist_inputs[:2]] dist_embeds = [Flatten()(dist_embed) for dist_embed in dist_embeds] #Scoring layer #In https://www.aclweb.org/anthology/D17-1018, #used feed forward network which measures if it is an entity mention using a score #because we already know the word is mention. #In here, I just focus on the pairwise score PA = Multiply()([inputs[0], inputs[2]]) PB = Multiply()([inputs[1], inputs[2]]) #PairScore: sa(i,j) =wa·FFNNa([gi,gj,gi◦gj,φ(i,j)]) # gi is embedding of Pronoun # gj is embedding of A or B # gi◦gj is element-wise multiplication # φ(i,j) is the distance embedding if use_lingui_features: PA = Concatenate( axis=-1)([P, A, PA, dist_embeds[0]] + [dist_inputs[i] for i in [2, 3, 4, 5, 6]]) PB = Concatenate( axis=-1)([P, B, PB, dist_embeds[1]] + [dist_inputs[i] for i in [7, 8, 9, 10, 11]]) else: PA = Concatenate(axis=-1)([P, A, PA, dist_embeds[0]]) PB = Concatenate(axis=-1)([P, B, PB, dist_embeds[1]]) PA_score = self.ffnn(PA) PB_score = self.ffnn(PB) # Fix the Neither to score 0. score_e = Lambda(lambda x: K.zeros_like(x))(PB_score) #Final Output output = Concatenate(axis=-1)([ PA_score, PB_score, score_e ]) # [Pronoun and A score, Pronoun and B score, Neither Score] output = Activation('softmax')(output) model = Model(inputs + dist_inputs, output) return model
def __init__(self, dim, batch_norm, dropout, rec_dropout, task, target_repl=False, deep_supervision=False, num_classes=1, depth=1, input_dim=76, **kwargs): print "==> not used params in network class:", kwargs.keys() self.dim = dim self.batch_norm = batch_norm self.dropout = dropout self.rec_dropout = rec_dropout self.depth = depth if task in ['decomp', 'ihm', 'ph']: final_activation = 'sigmoid' elif task in ['los']: if num_classes == 1: final_activation = 'relu' else: final_activation = 'softmax' else: return ValueError("Wrong value for task") # Input layers and masking X = Input(shape=(None, input_dim), name='X') inputs = [X] mX = Masking()(X) if deep_supervision: M = Input(shape=(None,), name='M') inputs.append(M) # Configurations is_bidirectional = True if deep_supervision: is_bidirectional = False # Main part of the network for i in range(depth - 1): num_units = dim if is_bidirectional: num_units = num_units // 2 gru = GRU(units=num_units, activation='tanh', return_sequences=True, recurrent_dropout=rec_dropout, dropout=dropout) if is_bidirectional: mX = Bidirectional(gru)(mX) else: mX = gru(mX) # Output module of the network return_sequences = (target_repl or deep_supervision) L_lv1 = GRU(units=dim, activation='tanh', return_sequences=True, dropout=dropout, recurrent_dropout=rec_dropout)(mX) L = L_lv1 if dropout > 0: L = Dropout(dropout)(L) label_struct = utils.read_hierarchical_labels('../../data/phenotyping/label_list.txt', '../../data/phenotyping/label_struct.json') # only support 2 levels num_superclass = len(label_struct.keys()) y_lv1 = {} y_lv2 = {} for class_lv1 in label_struct.keys(): y_lv1[class_lv1] = Dense(1, activation=final_activation)(Lambda(lambda x: x[:,-1,:])(L)) L_lv2_gru = GRU(units=dim, activation='tanh', return_sequences=return_sequences, dropout=dropout, recurrent_dropout=rec_dropout)(L_lv1) if dropout > 0: L_lv2_gru = Dropout(dropout)(L_lv2_gru) y_lv2[class_lv1] = {} for class_lv2 in label_struct[class_lv1]: y_lv2[class_lv1][class_lv2] = Dense(1, activation=final_activation)(L_lv2_gru) label_mapper = {} for super_label in label_struct.keys(): label_mapper[super_label] = set(label_struct[super_label]) y_final = [] for i in range(25): if (i in label_mapper[25]) and (i not in label_mapper[26]): y_final.append(Multiply()([y_lv1[25], y_lv2[25][i]])) elif (i not in label_mapper[25]) and (i in label_mapper[26]): y_final.append(Multiply()([y_lv1[26], y_lv2[26][i]])) elif (i in label_mapper[25]) and (i in label_mapper[26]): y_final.append(Add()([Multiply()([y_lv1[25], y_lv2[25][i]]), Multiply()([y_lv1[26], y_lv2[26][i]])])) y_final.append(y_lv1[25]) y_final.append(y_lv1[26]) y = Concatenate()(y_final) outputs = [y] return super(Network, self).__init__(inputs=inputs, outputs=outputs)
x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)]) x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)]) #the below 4 lamda functions will calcluate the square of each input image lambda_1 = Lambda(lambda tensor : K.square(tensor))(fn_1) lambda_2 = Lambda(lambda tensor : K.square(tensor))(fn_2) lambda_3 = Lambda(lambda tensor : K.square(tensor))(vgg_1) lambda_4 = Lambda(lambda tensor : K.square(tensor))(vgg_2) added_facenet = Add()([x1, x2]) #this function will add two images image 1 image 2 given by facenet architecture added_vgg = Add()([vgg_1, vgg_2]) #this function will add two images image 3 image 4 given by VGG architecture subtract_fn = Subtract()([x1,x2]) #this function will subtract two images image 1 image 2 given by facenet architecture subtract_vgg = Subtract()([vgg_1,vgg_2]) #this function will subtract two images image 3 image 4 given by VGG architecture subtract_fn2 = Subtract()([x2,x1]) #this function will subtract two images image 2 image 1 given by facenet architecture subtract_vgg2 = Subtract()([vgg_2,vgg_1]) #this function will subtract two images image 4 image 3 given by VGG architecture prduct_fn1 = Multiply()([x1,x2]) #this function will multiply two images image 1 image 2 given by facenet architecture prduct_vgg1 = Multiply()([vgg_1,vgg_2]) #this function will multiply two images image 3 image 4 given by VGG architecture sqrt_fn1 = Add()([lambda_1,lambda_2]) # this function implements x1^2 + x2^2 where x1 and x2 are image by facenet sqrt_vgg1 = Add()([lambda_3,lambda_4]) # this function implements vgg_1^2 + vgg_2^2 where vgg_1 and vgg_2 are image by VGG sqrt_fn2 = Lambda(lambda tensor : K.sign(tensor)*K.sqrt(K.abs(tensor)+1e-9))(prduct_fn1) #squre_root of sqrt_fn1 sqrt_vgg2 = Lambda(lambda tensor : K.sign(tensor)*K.sqrt(K.abs(tensor)+1e-9))(prduct_vgg1) #squre_root of sqrt_vgg1 added_vgg = Conv2D(128 , [1,1] )(added_vgg) subtract_vgg = Conv2D(128 , [1,1] )(subtract_vgg) subtract_vgg2 = Conv2D(128 , [1,1] )(subtract_vgg2) prduct_vgg1 = Conv2D(128 , [1,1] )(prduct_vgg1) sqrt_vgg1 = Conv2D(128 , [1,1] )(sqrt_vgg1) sqrt_vgg2 = Conv2D(128 , [1,1] )(sqrt_vgg2) #finally concatenating all the above featues for final layer which is to be inputed to the dense layers.
def build_nn(num_antenna=64): nn_input = Input((num_antenna, num_sub, 2)) dropout_rate = 0.25 num_complex_channels = 6 def k_mean(tensor): return K.mean(tensor, axis=2) mean_input = Lambda(k_mean)(nn_input) print(mean_input.get_shape()) # complex to polar real = Lambda(lambda x: x[:, :, :, 0])(nn_input) imag = Lambda(lambda x: x[:, :, :, 1])(nn_input) # complex_crop = Lambda(lambda x: x[:, :, 0, :], output_shape=(Nb_Antennas, 2, 1))(complex_input) # complex_input = Reshape((Nb_Antennas, 2, 1))(mean_input) real_squared = Multiply()([real, real]) imag_squared = Multiply()([imag, imag]) real_imag_squared_sum = Add()([real_squared, imag_squared]) # amplitude def k_sqrt(tensor): r = K.sqrt(tensor) return r r = Lambda(k_sqrt)(real_imag_squared_sum) r = Reshape((num_antenna, num_sub, 1))(r) print(r.get_shape()) # phase def k_atan(tensor): import tensorflow as tf t = tf.math.atan2(tensor[0], tensor[1]) return t t = Lambda(k_atan)([imag, real]) t = Reshape((num_antenna, num_sub, 1))(t) print(t.get_shape()) def ifft(x): y = tf.complex(x[:, :, :, 0], x[:, :, :, 1]) ifft = tf.spectral.ifft(y) return tf.stack([tf.math.real(ifft), tf.math.imag(ifft)], axis=3) polar_input = Concatenate()([r, t]) time_input = Lambda(ifft)(nn_input) total_input = Concatenate()([nn_input, polar_input, time_input]) # print("total", total_input.get_shape()) # reduce dimension of time axis lay_input = Reshape( (num_antenna, num_sub, num_complex_channels, 1))(total_input) layD1 = Conv3D(8, (1, 23, num_complex_channels), strides=(1, 5, 1), padding='same')(lay_input) layD1 = LeakyReLU(alpha=0.3)(layD1) layD1 = Dropout(dropout_rate)(layD1) layD2 = Conv3D(8, (1, 23, 1), padding='same')(layD1) layD2 = LeakyReLU(alpha=0.3)(layD2) layD2 = Concatenate()([layD1, layD2]) layD2 = Conv3D(8, (1, 1, num_complex_channels), padding='same')(layD2) layD2 = LeakyReLU(alpha=0.3)(layD2) layD2 = Conv3D(8, (1, 23, 1), strides=(1, 5, 1), padding='same', kernel_regularizer=regularizers.l2(0.01))(layD2) layD2 = LeakyReLU(alpha=0.3)(layD2) layD2 = Dropout(dropout_rate)(layD2) layD3 = Conv3D(8, (1, 23, 1), padding='same')(layD2) layD3 = LeakyReLU(alpha=0.3)(layD3) layD3 = Concatenate()([layD2, layD3]) layD3 = Conv3D(8, (1, 1, num_complex_channels), padding='same')(layD3) layD3 = LeakyReLU(alpha=0.3)(layD3) layD3 = Conv3D(8, (1, 23, 1), strides=(1, 5, 1), padding='same', kernel_regularizer=regularizers.l2(0.01))(layD3) layD3 = LeakyReLU(alpha=0.3)(layD3) layD3 = Dropout(dropout_rate)(layD3) layD4 = Conv3D(8, (1, 23, 1), padding='same')(layD3) layD4 = LeakyReLU(alpha=0.3)(layD4) layD4 = Concatenate()([layD4, layD3]) layD4 = Conv3D(8, (1, 1, num_complex_channels), padding='same')(layD4) layD4 = LeakyReLU(alpha=0.3)(layD4) layV1 = Conv3D(8, (8, 1, 1), padding='same')(layD4) layV1 = LeakyReLU(alpha=0.3)(layV1) layV1 = Dropout(dropout_rate)(layV1) layV1 = Concatenate()([layV1, layD4]) layV2 = Conv3D(8, (8, 1, 1), padding='same', kernel_regularizer=regularizers.l2(0.01))(layV1) layV2 = LeakyReLU(alpha=0.3)(layV2) layV2 = Dropout(dropout_rate)(layV2) layV2 = Concatenate()([layV2, layV1]) layV3 = Conv3D(8, (8, 1, 1), padding='same')(layV2) layV3 = LeakyReLU(alpha=0.3)(layV3) layV3 = Dropout(dropout_rate)(layV3) layV3 = Concatenate()([layV3, layV2]) layV4 = Conv3D(8, (8, 1, 1), padding='same')(layV3) layV4 = LeakyReLU(alpha=0.3)(layV4) layV4 = Dropout(dropout_rate)(layV4) layV4 = Concatenate()([layV4, layV3]) layV5 = Conv3D(8, (8, 1, 1), padding='same')(layV4) layV5 = LeakyReLU(alpha=0.3)(layV5) layV5 = Dropout(dropout_rate)(layV5) nn_output = Flatten()(layV5) nn_output = Dense(64, activation='relu')(nn_output) nn_output = Dense(32, activation='relu')(nn_output) nn_output = Dense(2, activation='linear')(nn_output) nn = Model(inputs=nn_input, outputs=nn_output) nn = multi_gpu_model(nn, gpus=2) nn.compile(optimizer='Adam', loss='mse', metrics=[dist]) nn.summary() return nn
x_val = sequence.pad_sequences(x_val, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('x_val shape:', x_val.shape) # configuration matches 4.47 Million parameters with `units=600` and `64 embedding dim` print('Build model...') inputs = Input(shape=(maxlen, )) embed = Embedding(embed_size + 1, 128, input_shape=(maxlen, ))(inputs) groupNormal = GroupNormalization(groups=32, axis=-1)(embed) x_score = Convolution1D(filters=1, kernel_size=3, padding='same', activation='sigmoid')(groupNormal) x_atten = Multiply()([x_score, embed]) first_ind = IndRNN(FLAGS.units, recurrent_clip_min=-1, recurrent_clip_max=-1, dropout=0.0, recurrent_dropout=0.0, return_sequences=True)(x_atten) second_ind = IndRNN(FLAGS.units, recurrent_clip_min=-1, recurrent_clip_max=-1, dropout=0.0, recurrent_dropout=0.0, return_sequences=False)(first_ind) fc = Dense(128, kernel_initializer='he_normal')(second_ind) ac = Activation('relu')(fc)
raise RuntimeError('Unknown image_dim_ordering.') X = Conv2D(32, 8, strides=4, activation='relu')(X) X = Conv2D(64, 4, strides=2, activation='relu')(X) X = Conv2D(64, 3, strides=1, activation='relu')(X) X = MaxPool2D(2)(X) X = Flatten()(X) Features = Dense(3, activation='softmax')(X) Controller = Lambda(lambda x: K.concatenate([ K.reshape(K.zeros_like(x[:, 0]), (-1, 1)), # to make sure action 0 is dominated K.reshape(-x[:, 0] - x[:, 1] - 2 * x[:, 2], (-1, 1)), K.reshape(x[:, 1] + x[:, 2], (-1, 1)), K.reshape(x[:, 0] + x[:, 2], (-1, 1))]))(Features) Controller = Activation("softmax")(Controller) OutLayer = Dense(512, activation='relu')(X) OutLayer = Dense(4, activation="linear")(OutLayer) OutLayer = Multiply()([Controller, OutLayer]) model = Model(inputs=InpLayer, outputs=OutLayer) print(model.summary()) class DisplayFeatureLayer(Callback): """ Custom callback layer to get the output of PhyLayer """ def __init__(self, interval=10000): super(DisplayFeatureLayer, self).__init__() self.total_steps = 0 self.interval = interval def on_step_end(self, step, logs={}):
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 bn_eps = 0.0001 block_name = str(stage) + "_" + str(block) conv_name_base = "conv" + block_name relu_name_base = "relu" + block_name x = Conv2D(filters1, (1, 1), use_bias=False, name=conv_name_base + '_x1')(input_tensor) x = BatchNormalization(axis=bn_axis, epsilon=bn_eps, name=conv_name_base + '_x1_bn')(x) x = Activation('relu', name=relu_name_base + '_x1')(x) x = Conv2D(filters2, kernel_size, strides=strides, padding='same', use_bias=False, name=conv_name_base + '_x2')(x) x = BatchNormalization(axis=bn_axis, epsilon=bn_eps, name=conv_name_base + '_x2_bn')(x) x = Activation('relu', name=relu_name_base + '_x2')(x) x = Conv2D(filters3, (1, 1), use_bias=False, name=conv_name_base + '_x3')(x) x = BatchNormalization(axis=bn_axis, epsilon=bn_eps, name=conv_name_base + '_x3_bn')(x) se = GlobalAveragePooling2D(name='pool' + block_name + '_gap')(x) se = Dense(filters3 // 16, activation='relu', name='fc' + block_name + '_sqz')(se) se = Dense(filters3, activation='sigmoid', name='fc' + block_name + '_exc')(se) se = Reshape([1, 1, filters3])(se) x = Multiply(name='scale' + block_name)([x, se]) shortcut = Conv2D(filters3, (1, 1), strides=strides, use_bias=False, name=conv_name_base + '_prj')(input_tensor) shortcut = BatchNormalization(axis=bn_axis, epsilon=bn_eps, name=conv_name_base + '_prj_bn')(shortcut) x = layers.add([x, shortcut], name='block_' + block_name) x = Activation('relu', name=relu_name_base)(x) return x
def L2X(train=True): """ Generate scores on features on validation by L2X. Train the L2X model with variational approaches if train = True. """ # print('Loading dataset...') if train: load_data = load_data_bert if args.target_model == "bert" else load_data_CNN_LSTM if args.target_model == "wordCNN" or args.target_model == "wordLSTM": print( "WARNING: intended to attack CNN or LSTM models. Make sure that source/target_max_seq_length match target model training lengths" ) x_train, y_train, x_val, y_val, id_to_word, x_explain = load_data() pred_train = np.load(args.train_pred_labels, allow_pickle=True) pred_val = np.load(args.val_pred_labels, allow_pickle=True) x_train, y_train, x_val, y_val = x_train[:pred_train.shape[ 0]], y_train[:pred_train. shape[0]], x_val[:pred_train. shape[0]], y_val[:pred_train. shape[0]] else: load_data_explain = load_data_bert_explain if args.target_model == "bert" else None id_to_word, x_explain = load_data_explain() # print('Creating model...') # P(S|X) with tf.variable_scope('selection_model'): X_ph = Input(shape=(maxlen, ), dtype='int32') logits_T = construct_gumbel_selector(X_ph, max_features, embedding_dims, maxlen) tau = 0.5 T = Sample_Concrete(tau, k)(logits_T) # q(X_S) with tf.variable_scope('prediction_model'): emb2 = Embedding(max_features, embedding_dims, input_length=maxlen)(X_ph) net = Mean(Multiply()([emb2, T])) net = Dense(hidden_dims)(net) net = Activation('relu')(net) preds = Dense((4 if args.dataset_name == "ag" else 2), activation='softmax', name='new_dense')(net) model = Model(inputs=X_ph, outputs=preds) model.compile( loss='categorical_crossentropy', optimizer='rmsprop', #optimizer, metrics=['acc']) # train_acc = np.mean(np.argmax(pred_train, axis = 1)==np.argmax(y_train, axis = 1)) # val_acc = np.mean(np.argmax(pred_val, axis = 1)==np.argmax(y_val, axis = 1)) # print('The train and validation accuracy of the original model is {} and {}'.format(train_acc, val_acc)) if train: filepath = os.path.join(args.outdir, "l2x.hdf5") checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] st = time.time() model.fit(x_train, pred_train, validation_data=(x_val, pred_val), callbacks=callbacks_list, epochs=5, batch_size=batch_size, verbose=0) duration = time.time() - st print('Training time is {}'.format(duration)) model.load_weights(os.path.join(args.outdir, 'l2x.hdf5'), by_name=True) pred_model = Model(X_ph, logits_T) pred_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) st = time.time() scores_explain = pred_model.predict(x_explain, verbose=0, batch_size=batch_size)[:, :, 0] scores_explain = np.reshape(scores_explain, [scores_explain.shape[0], maxlen]) if train: scores = pred_model.predict(x_val, verbose=0, batch_size=batch_size)[:, :, 0] scores = np.reshape(scores, [scores.shape[0], maxlen]) pred_explain = model.predict(x_explain, verbose=0, batch_size=1) pred_fname_target_attack = 'pred_explain' + '-L2X_target' + '_k_' + str( args.k_words) + '_' + args.target_model + '.npy' np.save(os.path.join(args.outdir, pred_fname_target_attack), pred_explain) return scores, x_val, T, scores_explain, x_explain else: pred_explain = model.predict(x_explain, verbose=0, batch_size=batch_size) pred_fname_synon = args.save_fname_explain_id + '_pred_synon' + '-L2X_target' + '_k_' + str( args.k_words) + '_' + args.target_model + '.npy' np.save(os.path.join(args.outdir, pred_fname_synon), pred_explain) return T, scores_explain, x_explain
def __call__(self): logging.debug("Creating model...") inputs = Input(shape=self._input_shape) #------------------------------------------------------------------------------------------------------------------------- x = Conv2D(32, (3, 3))(inputs) x = BatchNormalization(axis=self._channel_axis)(x) x = Activation('relu')(x) x_layer1 = AveragePooling2D(2, 2)(x) x = Conv2D(32, (3, 3))(x_layer1) x = BatchNormalization(axis=self._channel_axis)(x) x = Activation('relu')(x) x_layer2 = AveragePooling2D(2, 2)(x) x = Conv2D(32, (3, 3))(x_layer2) x = BatchNormalization(axis=self._channel_axis)(x) x = Activation('relu')(x) x_layer3 = AveragePooling2D(2, 2)(x) x = Conv2D(32, (3, 3))(x_layer3) x = BatchNormalization(axis=self._channel_axis)(x) x = Activation('relu')(x) #------------------------------------------------------------------------------------------------------------------------- s = Conv2D(16, (3, 3))(inputs) s = BatchNormalization(axis=self._channel_axis)(s) s = Activation('tanh')(s) s_layer1 = MaxPooling2D(2, 2)(s) s = Conv2D(16, (3, 3))(s_layer1) s = BatchNormalization(axis=self._channel_axis)(s) s = Activation('tanh')(s) s_layer2 = MaxPooling2D(2, 2)(s) s = Conv2D(16, (3, 3))(s_layer2) s = BatchNormalization(axis=self._channel_axis)(s) s = Activation('tanh')(s) s_layer3 = MaxPooling2D(2, 2)(s) s = Conv2D(16, (3, 3))(s_layer3) s = BatchNormalization(axis=self._channel_axis)(s) s = Activation('tanh')(s) #------------------------------------------------------------------------------------------------------------------------- # Classifier block s_layer4 = Conv2D(10, (1, 1), activation='relu')(s) s_layer4 = Flatten()(s_layer4) s_layer4_mix = Dropout(0.2)(s_layer4) s_layer4_mix = Dense(units=self.stage_num[0], activation="relu")(s_layer4_mix) x_layer4 = Conv2D(10, (1, 1), activation='relu')(x) x_layer4 = Flatten()(x_layer4) x_layer4_mix = Dropout(0.2)(x_layer4) x_layer4_mix = Dense(units=self.stage_num[0], activation="relu")(x_layer4_mix) feat_a_s1_pre = Multiply()([s_layer4, x_layer4]) delta_s1 = Dense(1, activation='tanh', name='delta_s1')(feat_a_s1_pre) feat_a_s1 = Multiply()([s_layer4_mix, x_layer4_mix]) feat_a_s1 = Dense(2 * self.stage_num[0], activation='relu')(feat_a_s1) pred_a_s1 = Dense(units=self.stage_num[0], activation="relu", name='pred_age_stage1')(feat_a_s1) #feat_local_s1 = Lambda(lambda x: x/10)(feat_a_s1) #feat_a_s1_local = Dropout(0.2)(pred_a_s1) local_s1 = Dense(units=self.stage_num[0], activation='tanh', name='local_delta_stage1')(feat_a_s1) #------------------------------------------------------------------------------------------------------------------------- s_layer2 = Conv2D(10, (1, 1), activation='relu')(s_layer2) s_layer2 = MaxPooling2D(4, 4)(s_layer2) s_layer2 = Flatten()(s_layer2) s_layer2_mix = Dropout(0.2)(s_layer2) s_layer2_mix = Dense(self.stage_num[1], activation='relu')(s_layer2_mix) x_layer2 = Conv2D(10, (1, 1), activation='relu')(x_layer2) x_layer2 = AveragePooling2D(4, 4)(x_layer2) x_layer2 = Flatten()(x_layer2) x_layer2_mix = Dropout(0.2)(x_layer2) x_layer2_mix = Dense(self.stage_num[1], activation='relu')(x_layer2_mix) feat_a_s2_pre = Multiply()([s_layer2, x_layer2]) delta_s2 = Dense(1, activation='tanh', name='delta_s2')(feat_a_s2_pre) feat_a_s2 = Multiply()([s_layer2_mix, x_layer2_mix]) feat_a_s2 = Dense(2 * self.stage_num[1], activation='relu')(feat_a_s2) pred_a_s2 = Dense(units=self.stage_num[1], activation="relu", name='pred_age_stage2')(feat_a_s2) #feat_local_s2 = Lambda(lambda x: x/10)(feat_a_s2) #feat_a_s2_local = Dropout(0.2)(pred_a_s2) local_s2 = Dense(units=self.stage_num[1], activation='tanh', name='local_delta_stage2')(feat_a_s2) #------------------------------------------------------------------------------------------------------------------------- s_layer1 = Conv2D(10, (1, 1), activation='relu')(s_layer1) s_layer1 = MaxPooling2D(8, 8)(s_layer1) s_layer1 = Flatten()(s_layer1) s_layer1_mix = Dropout(0.2)(s_layer1) s_layer1_mix = Dense(self.stage_num[2], activation='relu')(s_layer1_mix) x_layer1 = Conv2D(10, (1, 1), activation='relu')(x_layer1) x_layer1 = AveragePooling2D(8, 8)(x_layer1) x_layer1 = Flatten()(x_layer1) x_layer1_mix = Dropout(0.2)(x_layer1) x_layer1_mix = Dense(self.stage_num[2], activation='relu')(x_layer1_mix) feat_a_s3_pre = Multiply()([s_layer1, x_layer1]) delta_s3 = Dense(1, activation='tanh', name='delta_s3')(feat_a_s3_pre) feat_a_s3 = Multiply()([s_layer1_mix, x_layer1_mix]) feat_a_s3 = Dense(2 * self.stage_num[2], activation='relu')(feat_a_s3) pred_a_s3 = Dense(units=self.stage_num[2], activation="relu", name='pred_age_stage3')(feat_a_s3) #feat_local_s3 = Lambda(lambda x: x/10)(feat_a_s3) #feat_a_s3_local = Dropout(0.2)(pred_a_s3) local_s3 = Dense(units=self.stage_num[2], activation='tanh', name='local_delta_stage3')(feat_a_s3) #------------------------------------------------------------------------------------------------------------------------- def merge_age(x, s1, s2, s3, lambda_local, lambda_d): a = x[0][:, 0] * 0 b = x[0][:, 0] * 0 c = x[0][:, 0] * 0 A = s1 * s2 * s3 V = 101 for i in range(0, s1): a = a + (i + lambda_local * x[6][:, i]) * x[0][:, i] a = K.expand_dims(a, -1) a = a / (s1 * (1 + lambda_d * x[3])) for j in range(0, s2): b = b + (j + lambda_local * x[7][:, j]) * x[1][:, j] b = K.expand_dims(b, -1) b = b / (s1 * (1 + lambda_d * x[3])) / (s2 * (1 + lambda_d * x[4])) for k in range(0, s3): c = c + (k + lambda_local * x[8][:, k]) * x[2][:, k] c = K.expand_dims(c, -1) c = c / (s1 * (1 + lambda_d * x[3])) / ( s2 * (1 + lambda_d * x[4])) / (s3 * (1 + lambda_d * x[5])) age = (a + b + c) * V return age pred_a = Lambda(merge_age, arguments={ 's1': self.stage_num[0], 's2': self.stage_num[1], 's3': self.stage_num[2], 'lambda_local': self.lambda_local, 'lambda_d': self.lambda_d }, output_shape=(1, ), name='pred_a')([ pred_a_s1, pred_a_s2, pred_a_s3, delta_s1, delta_s2, delta_s3, local_s1, local_s2, local_s3 ]) model = Model(inputs=inputs, outputs=pred_a) return model
def get_model_0(num_users, num_items): # only global coupling num_users = num_users + 1 num_items = num_items + 1 ######################## attr side ################################## # Input user_attr_input = Input(shape=(30, ), dtype='float32', name='user_attr_input') user_attr_embedding = Dense(8, activation='relu')(user_attr_input) user_attr_embedding = Reshape((1, 8))(user_attr_embedding) item_attr_input = Input(shape=(18, ), dtype='float32', name='item_attr_input') item_attr_embedding = Dense(8, activation='relu')(item_attr_input) item_attr_embedding = Reshape((8, 1))(item_attr_embedding) merge_attr_embedding = Lambda( lambda x: K.batch_dot(x[0], x[1], axes=[1, 2]))( [user_attr_embedding, item_attr_embedding]) merge_attr_embedding_global = Flatten()(merge_attr_embedding) #merge_attr_embedding = Reshape((8, 8, 1))(merge_attr_embedding) #merge_attr_embedding = Conv2D(8, (3, 3))(merge_attr_embedding) #merge_attr_embedding = BatchNormalization(axis=3)(merge_attr_embedding) #merge_attr_embedding = Activation('relu')(merge_attr_embedding) # merge_attr_embedding = AveragePooling2D((2, 2))(merge_attr_embedding) # merge_attr_embedding = Dropout(0.35)(merge_attr_embedding) # merge_attr_embedding = Conv2D(32, (3, 3))(merge_attr_embedding) # merge_attr_embedding = BatchNormalization(axis=3)(merge_attr_embedding) # merge_attr_embedding = Activation('relu')(merge_attr_embedding) # merge_attr_embedding = MaxPooling2D((2, 2))(merge_attr_embedding) #merge_attr_embedding = Conv2D(8, (3, 3))(merge_attr_embedding) #merge_attr_embedding = BatchNormalization(axis=3)(merge_attr_embedding) #merge_attr_embedding = Activation('relu')(merge_attr_embedding) #merge_attr_embedding = Flatten()(merge_attr_embedding) #merge_attr_embedding = merge([merge_attr_embedding, merge_attr_embedding_global], mode='concat') attr_1 = Dense(16)(merge_attr_embedding_global) attr_1 = Activation('relu')(attr_1) # attr_1=BatchNormalization()(attr_1) # attr_1=Dropout(0.2)(attr_1) # attr_2 = Dense(16)(attr_1) # attr_2 = Activation('relu')(attr_2) # id_2=BatchNormalization()(id_2) # id_2=Dropout(0.2)(id_2) ######################## id side ################################## user_id_input = Input(shape=(1, ), dtype='float32', name='user_id_input') user_id_Embedding = Embedding(input_dim=num_users, output_dim=32, name='user_id_Embedding', embeddings_initializer=RandomNormal( mean=0.0, stddev=0.01, seed=None), embeddings_regularizer=l2(0), input_length=1) user_id_Embedding = Flatten()(user_id_Embedding(user_id_input)) item_id_input = Input(shape=(1, ), dtype='float32', name='item_id_input') item_id_Embedding = Embedding(input_dim=num_items, output_dim=32, name='item_id_Embedding', embeddings_initializer=RandomNormal( mean=0.0, stddev=0.01, seed=None), embeddings_regularizer=l2(0), input_length=1) item_id_Embedding = Flatten()(item_id_Embedding(item_id_input)) # id merge embedding merge_id_embedding = Multiply()([user_id_Embedding, item_id_Embedding]) # id_1 = Dense(64)(merge_id_embedding) # id_1 = Activation('relu')(id_1) id_2 = Dense(32)(merge_id_embedding) id_2 = Activation('relu')(id_2) # merge attr_id embedding merge_attr_id_embedding = Concatenate()([attr_1, id_2]) dense_1 = Dense(64)(merge_attr_id_embedding) dense_1 = Activation('relu')(dense_1) # dense_1=BatchNormalization()(dense_1) # dense_1=Dropout(0.2)(dense_1) # dense_2=Dense(16)(dense_1) # dense_2=Activation('relu')(dense_2) # dense_2=BatchNormalization()(dense_2) # dense_2=Dropout(0.2)(dense_2) # dense_3=Dense(8)(dense_2) # dense_3=Activation('relu')(dense_3) # dense_3=BatchNormalization()(dense_3) # dense_3=Dropout(0.2)(dense_3) topLayer = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name='topLayer')(dense_1) # Final prediction layer model = Model(inputs=[ user_attr_input, item_attr_input, user_id_input, item_id_input ], outputs=topLayer) return model
conv1 = BatchNormalization()(conv1) conv1 = Conv2D(256, (4, 4))(conv1) # conv1 = BatchNormalization()(conv1) conv1 = LeakyReLU()(conv1) conv1 = Flatten()(conv1) conv1 = RepeatVector(26)(conv1) conv1 = Flatten()(conv1) # conv1 = Dropout(0.25)(conv1) # conv1 = BatchNormalization()(conv1) conv2 = RepeatVector(256)(input2) conv2 = Flatten()(conv2) outputs = Multiply()([conv1, conv2]) outputs = ReLU()(outputs) conv1 = Dropout(0.25)(conv1) outputs = Dense(256, activation='relu')(outputs) conv1 = Dropout(0.25)(conv1) outputs = Dense(10, activation='softmax')(outputs) model = Model(inputs=[input1, input2], outputs=outputs) model.summary() optimizers = Adam(epsilon=1e-08) # optimizers = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) model.compile(optimizer=optimizers, loss='categorical_crossentropy', metrics=['acc'])
def nn_hits_model(hit_cutoff): # Input embeddings input_layer_e = Input(shape=(cutoff, ), name='event_input') embedding_event = Embedding(input_dim=no_channels, output_dim=embed_dim, input_length=cutoff, name='sequence_embedding')(input_layer_e) input_layer_d = Input(shape=(cutoff, ), name='device_input') embedding_device = Embedding(input_dim=no_devices, output_dim=embed_dim, input_length=cutoff, name='device_embedding')(input_layer_d) input_layer_vn = Input(shape=(cutoff, ), name='visitno_input') visitno_transformed = Reshape([cutoff, 1])(input_layer_vn) input_layer_tos = Input(shape=(cutoff, ), name='tos_input') tos_transformed = Reshape([cutoff, 1])(input_layer_tos) # Convolution for web visit information hits_input_layer = Input(shape=( cutoff, hit_cutoff, ), name='hit_input') hit_convolution = Conv1D(filters=16, kernel_size=64, padding="same", name='hit_conv')(hits_input_layer) emb_vector = [embedding_event] if use_device_city: emb_vector.append(embedding_device) if use_hits: emb_vector.append(hit_convolution) if use_tos: emb_vector.append(tos_transformed) emb_vector.append(visitno_transformed) input_embeddings = concatenate(emb_vector) # Add time attention layer for time between sessions embedding_phi = Embedding(input_dim=no_channels, output_dim=1, input_length=cutoff, name='phi_embedding')(input_layer_e) embedding_mu = Embedding(input_dim=no_channels, output_dim=1, input_length=cutoff, name='mu_embedding')(input_layer_e) time_input_layer = Input(shape=(cutoff, 1), name='time_input') # Scale other inputs according to time attention multiply = Multiply(name='multiplication')( [embedding_mu, time_input_layer]) added = Add(name='addition')([embedding_phi, multiply]) time_attention = Activation(activation='sigmoid', name='attention_activation')(added) product = Multiply(name='embeddings_product')( [input_embeddings, time_attention]) if use_time: lstm_1 = LSTM(lstm_dim, return_sequences=True, name='lstm1')(product) drop_1 = Dropout(rate=dropout, name='dropout1')(lstm_1) lstm_2 = LSTM(lstm_dim, return_sequences=True, name='lstm2')(drop_1) else: lstm_1 = LSTM(lstm_dim, return_sequences=True)(product) drop_1 = Dropout(rate=dropout)(lstm_1) lstm_2 = LSTM(lstm_dim, return_sequences=True)(drop_1) drop_2 = Dropout(rate=dropout, name='dropout2')(lstm_2) # Attention output layer input_attention_layer = TimeDistributed( Dense(1), name='input_attention_layer')(drop_2) attention_output_layer = TimeDistributed( Dense(1, activation='softmax'), name='attention_output_layer')(input_attention_layer) attention_product = Multiply(name='attention_product')( [drop_2, attention_output_layer]) flattened_output = Flatten()(attention_product) output_layer = Dense(1, activation='sigmoid', name='final_output')(flattened_output) model = Model(inputs=[ input_layer_e, input_layer_d, input_layer_vn, input_layer_tos, hits_input_layer, time_input_layer ], outputs=output_layer) opt = Nadam(lr=learn) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy']) return model
def malware_detection_model_4(): X_input = Input(shape=(1000, 102)) # Normalization 1 X = BatchNormalization()(X_input) # Gated CNN 1 a_sig = Conv1D(filters=128, kernel_size=(2), strides=1, kernel_regularizer=regularizers.l2(0.0005), activation="sigmoid", padding="same")(X) a_relu = Conv1D(filters=128, kernel_size=(2), strides=1, kernel_regularizer=regularizers.l2(0.0005), activation="relu", padding="same")(X) a = Multiply()([a_sig, a_relu]) # Gated CNN 2 b_sig = Conv1D(filters=128, kernel_size=(3), strides=1, kernel_regularizer=regularizers.l2(0.0005), activation="sigmoid", padding="same")(X) b_relu = Conv1D(filters=128, kernel_size=(3), strides=1, kernel_regularizer=regularizers.l2(0.0005), activation="relu", padding="same")(X) b = Multiply()([b_sig, b_relu]) # Concatenate X = Concatenate()([a, b]) # Normalization 2 X = BatchNormalization()(X) # BidirectionalLSTM X = Bidirectional(LSTM(100, return_sequences=True))(X) X = GlobalMaxPooling1D()(X) X = Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.0005))(X) X = Dropout(0.5)(X) X = Dense(2, kernel_regularizer=regularizers.l2(0.0005))(X) X = Activation("softmax")(X) model = Model(inputs=X_input, outputs=X) opt = Adam(learning_rate=0.001, decay=1e-8) model.compile(loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) return model
def ResPR(num_users, num_items, mf_dim=10, reg_mf=0, layers=[[20, 20], [20, 10]], reg_layers=[[0, 0], [0, 0]]): assert len(layers) == len(reg_layers) user_input = Input(shape=(1, ), dtype='int32') item_input_pos = Input(shape=(1, ), dtype='int32') item_input_neg = Input(shape=(1, ), dtype='int32') MF_Embedding_User = Embedding(input_dim=num_users, output_dim=mf_dim, name='mf_embedding_user', embeddings_initializer=init_normal, embeddings_regularizer=l2(reg_mf), input_length=1) MF_Embedding_Item = Embedding(input_dim=num_items, output_dim=mf_dim, name='mf_embedding_item', embeddings_initializer=init_normal, embeddings_regularizer=l2(reg_mf), input_length=1) MLP_Embedding_User = Embedding(input_dim=num_users, output_dim=int(layers[0][0] / 2), name='mlp_embedding_user', embeddings_initializer=init_normal, embeddings_regularizer=l2(reg_layers[0][0]), input_length=1) MLP_Embedding_Item = Embedding(input_dim=num_items, output_dim=int(layers[0][0] / 2), name='mlp_embedding_item', embeddings_initializer=init_normal, embeddings_regularizer=l2(reg_layers[0][0]), input_length=1) mf_user_latent = Flatten()(MF_Embedding_User(user_input)) mf_item_latent_pos = Flatten()(MF_Embedding_Item(item_input_pos)) mf_item_latent_neg = Flatten()(MF_Embedding_Item(item_input_neg)) prefer_pos = Multiply()([mf_user_latent, mf_item_latent_pos]) prefer_neg = Multiply()([mf_user_latent, mf_item_latent_neg]) prefer_neg = Lambda(lambda x: -x)(prefer_neg) mf_vector = Concatenate()([prefer_pos, prefer_neg]) mlp_user_latent = Flatten()(MLP_Embedding_User(user_input)) mlp_item_latent_pos = Flatten()(MLP_Embedding_Item(item_input_pos)) mlp_item_latent_neg = Flatten()(MLP_Embedding_Item(item_input_neg)) mlp_item_latent_neg = Lambda(lambda x: -x)(mlp_item_latent_neg) mlp_vector = Concatenate()( [mlp_user_latent, mlp_item_latent_pos, mlp_item_latent_neg]) ## create first layer of MLP first_layer = Dense(layers[0][1], kernel_regularizer=l2(reg_layers[0][1]), activation='relu', name='layer1')(mlp_vector) ## build model with shortcuts res_layers = layers[1:] res_reg = reg_layers[1:] mlp_vector = ResMLP(first_layer, res_layers, res_reg) predict_vector = Concatenate()([mf_vector, mlp_vector]) # Final prediction layer prediction = Dense(1, activation='sigmoid', kernel_initializer='lecun_uniform', name='prediction')(predict_vector) model = Model(inputs=[user_input, item_input_pos, item_input_neg], outputs=prediction) return model
def afm(sparse_field_num, sparse_index_num, dense_field_num, embed_size=8, embeddings_initializer='uniform', embeddings_regularizer=None, attn_fact_num=8, kernel_initializer='glorot_uniform', kernel_regularizer=None, bias_initializer='zeros', bias_regularizer=None, output_use_bias=True, output_activation=None): """ An implementation of NFM model in CTR problem. :param sparse_field_num: The number of sparse field :param sparse_index_num: The total number index used to encode sparse features in all sparse field :param dense_field_num: The number of dense field :param embed_size: The embedding size :param embeddings_initializer: The initializer used to initialize kernels in embedding layer :param embeddings_regularizer: The regularizer used in embedding layer :param attn_fact_num: The number of latent factories used in attention network part :param kernel_initializer: The initializer used to initialize kernel :param kernel_regularizer: The regularizer used on kernel :param bias_initializer: The initializer used to initialize bias :param bias_regularizer: The regularizer used on bias :param output_use_bias: In output layer, whether use bias or not. :param output_activation: The activation function used in output layer :return: A keras Model object """ # 1. Inputs # ============================================================================================= sparse_feat_index = Input(shape=(sparse_field_num, ), name='sparse_feat_index') dense_feat_value = Input(shape=(dense_field_num, ), name='dense_feat_value') # 2. LR part => (batch_size, 1) # ============================================================================================= # 2.1 Sparse features linear sum part => (batch_size, 1) # (batch_size, sparse_field_num, 1) sparse_feat_weight = Embedding( input_dim=sparse_index_num, output_dim=1, embeddings_initializer=kernel_initializer, embeddings_regularizer=kernel_regularizer, name='sparse_feat_weight_layer')(sparse_feat_index) # (batch_size, 1) sparse_lr_out = sum_layer(axis=1, name='sparse_lr_out_layer')(sparse_feat_weight) # 2.2 Dense features embedding part => (batch_size, 1) # (batch_size, dense_field_num, 1) dense_lr_out = Dense(units=1, activation=None, use_bias=False, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, name='dense_lr_out_layer')(dense_feat_value) # 3. Embedding Layer => (batch_size, sparse_field_num + dense_field_num, embed_size) # ============================================================================================= # 3.1 Sparse features embedding part => (batch_size, sparse_field_num, embed_size) embed_sparse_feat_index = Embedding( input_dim=sparse_index_num, output_dim=embed_size, embeddings_initializer=embeddings_initializer, embeddings_regularizer=embeddings_regularizer, name='embed_sparse_feat_index_layer')(sparse_feat_index) # 3.2 Dense features embedding part => (batch_size, dense_field_num, embed_size) embed_dense_feat_value = DenseEmbedding( embed_size=embed_size, embeddings_initializer=embeddings_initializer, embeddings_regularizer=embeddings_regularizer, name='embed_dense_feat_value_layer')(dense_feat_value) # 3.3 Concatenate embedded sparse index and embedded dense value embed_output = Concatenate(axis=1, name='embed_output_layer')( [embed_sparse_feat_index, embed_dense_feat_value]) # 4. Pair-wise interaction => (batch_size, pairs, embed_size), pairs = (field_num * (field_num-1))/2 # ============================================================================================= pair_wise_feat = inner_product_layer( keepdims=True, name='inner_product_layer')(embed_output) # 5. Attention weight part => (batch_size, pairs, 1) # ============================================================================================= # 5.1 (batch_size, pairs, attn_fact_num) attn_z1 = Dense(units=attn_fact_num, use_bias=True, name='attn_dense_layer1', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer, bias_initializer=bias_initializer, bias_regularizer=bias_regularizer)(pair_wise_feat) attn_a1 = Activation('relu', name='attn_activation_layer1')(attn_z1) # 5.2 (batch_size, pairs, 1) attn_z2 = Dense(units=1, use_bias=False, name='attn_dense_layer2', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(attn_a1) attn_weight = Softmax(axis=1, name='attn_output_layer')(attn_z2) # 6. Attention based pooling => (batch_size, 1) # ============================================================================================= # (batch_size, pairs, embed_size) pool_input = Multiply(name='pooling_input_layer')( [pair_wise_feat, attn_weight]) # (batch_size, embed_size) pool_output = sum_layer(1, name='pooling_output_layer')(pool_input) # (batch_size, 1) afm_output = Dense(units=1, use_bias=False, name='afm_output_layer', kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer)(pool_output) # 7. Output layer => (batch_size, 1) # ============================================================================================= outputs = OutputLayer( activation=output_activation, use_bias=output_use_bias, bias_initializer=bias_initializer, bias_regularizer=bias_regularizer, name='output_layer')([sparse_lr_out, dense_lr_out, afm_output]) # 6. Build Model # ============================================================================================= model = Model(inputs=[sparse_feat_index, dense_feat_value], outputs=outputs) model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['mse']) return model