def decomposable_attention(maxlen, max_features, projection_hidden=0, projection_dropout=0.2, projection_dim=64, compare_dim=128, compare_dropout=0.2, dense_dim=64, dense_dropout=0.2):#maxlen, max_features, projection_hidden=0, projection_dropout=0.2, projection_dim=300, compare_dim=500, compare_dropout=0.2, dense_dim=300, dense_dropout=0.2 inp1 = Input(shape=(maxlen,)) inp2 = Input(shape=(maxlen,)) emb = Embedding(max_features, 256) emb1 = emb(inp1) emb2 = emb(inp2) # Projection projection_layers = [] if projection_hidden > 0: projection_layers.extend([ Dense(projection_hidden, activation='relu'), Dropout(rate=projection_dropout), ]) projection_layers.extend([ Dense(projection_dim, activation=None), Dropout(rate=projection_dropout), ]) encoded1 = time_distributed(emb1, projection_layers) encoded2 = time_distributed(emb2, projection_layers) # Attention att1, att2 = soft_attention_alignment(encoded1, encoded2) # Compare combine1 = Concatenate()([encoded1, att2, submult(encoded1, att2)]) combine2 = Concatenate()([encoded2, att1, submult(encoded2, att1)]) compare_layers = [ Dense(compare_dim, activation='relu'), Dropout(compare_dropout), Dense(compare_dim, activation='relu'), Dropout(compare_dropout), ] compare1 = time_distributed(combine1, compare_layers) compare2 = time_distributed(combine2, compare_layers) # Aggregate agg1 = apply_multiple(compare1, [GlobalAvgPool1D(), GlobalMaxPool1D()]) agg2 = apply_multiple(compare2, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Merge merge = Concatenate()([agg1, agg2]) #merge = BatchNormalization()(merge) dense = Dense(dense_dim, activation='relu')(merge) dense = Dropout(dense_dropout)(dense) #dense = BatchNormalization()(dense) #dense = Dense(dense_dim, activation='relu')(dense) #dense = Dropout(dense_dropout)(dense) preds = Dense(2, activation='softmax')(dense) model = Model(inputs=[inp1, inp2], outputs=preds) print(model.summary()) return model
def _inference_composition_block(self, m_a, m_b): y_a = Bidirectional(LSTM(300, return_sequences=True))(m_a) y_b = Bidirectional(LSTM(300, return_sequences=True))(m_b) class GlobalAvgPool1DMasked(Layer): def __init__(self, **kwargs): self.supports_masking = True super(GlobalAvgPool1DMasked, self).__init__(**kwargs) def compute_mask(self, inputs, mask=None): return None def call(self, inputs, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.repeat(mask, inputs.shape[-1]) mask = tf.transpose(mask, [0, 2, 1]) inputs = inputs * mask return K.sum(inputs, axis=1) / K.sum(mask, axis=1) else: print('not mask average!') return super().call(inputs) def compute_output_shape(self, input_shape): return (input_shape[0], input_shape[2]) class GlobalMaxPool1DMasked(GlobalMaxPool1D): def __init__(self, **kwargs): self.supports_masking = True super(GlobalMaxPool1DMasked, self).__init__(**kwargs) def compute_mask(self, inputs, mask=None): return None def call(self, inputs, mask=None): return super(GlobalMaxPool1DMasked, self).call(inputs) max_pooling_a = GlobalMaxPool1D()(y_a) avg_pooling_a = GlobalAvgPool1D()(y_a) max_pooling_b = GlobalMaxPool1D()(y_b) avg_pooling_b = GlobalAvgPool1D()(y_b) y = Concatenate()( [avg_pooling_a, max_pooling_a, avg_pooling_b, max_pooling_b]) y = Dense(1024, activation='tanh')(y) ### 1024 神经元个数 y = Dropout(0.5)(y) y = Dense(1024, activation='tanh')(y) ### 1024 y = Dropout(0.5)(y) y = Dense(self._n_classes, activation='softmax')(y) return y
def get_model(self): q1 = Input(name='q1', shape=(self.configer.maxlen,)) q2 = Input(name='q2', shape=(self.configer.maxlen,)) embedding_op = Embedding(self.configer.max_features, self.configer.embedding_size, input_length=self.configer.maxlen) # embedding_q2 = Embedding(self.configer.max_features, self.configer.embedding_size, # input_length=self.configer.maxlen) bn = BatchNormalization() # embedding + batch normalization q1_embed = bn(embedding_op(q1)) q2_embed = bn(embedding_op(q2)) # todo 一个还是两个 # bi-lstm encode = Bidirectional(LSTM(self.configer.lstm_dim, return_sequences=True)) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) # Attention q1_aligned, q2_aligned = self._soft_attention_alignment(q1_encoded, q2_encoded) # Compose q1_combined = Concatenate()([q1_encoded, q2_aligned, self._submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()([q2_encoded, q1_aligned, self._submult(q2_encoded, q1_aligned)]) # todo compose = Bidirectional(LSTM(self.configer.lstm_dim, return_sequences=True)) q1_compare = compose(q1_combined) q2_compare = compose(q2_combined) # Aggregate q1_rep = self._apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = self._apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier merged = Concatenate()([q1_rep, q2_rep]) dense = BatchNormalization()(merged) dense = Dense(512, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(self.configer.dropout_rate)(dense) dense = Dense(self.configer.dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(self.configer.dropout_rate)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2], outputs=out_) model.compile(optimizer=Adam(lr=1e-3), loss='binary_crossentropy', metrics=['accuracy']) return model
def model(max_features,maxlen,attention=True): """ build a model with bi-gru ,you also can choose add attention layer or not you should define the max_features and maxlen :param max_features: :param maxlen: :param attention: :return: """ embedding_layer = Embedding(input_dim=max_features, output_dim=128, input_length=maxlen, trainable=True) sequence_input = Input(shape=(maxlen,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) gru = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences) if attention: att = AttentionLayer()(gru) preds = Dense(1, activation='sigmoid')(att) else: flat = GlobalAvgPool1D()(gru) preds = Dense(1, activation='sigmoid')(flat) model = Model(sequence_input, preds) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc']) model.summary() return model
def loc_model(fine_weights): impressions_input = Input(shape=[max_length], name="impressions_input") loc_input = Input(shape=[1], name='loc_input') impression_input = Input(shape=[1], name="impression_input") # input_length 不设置可以接受任意长度 item_embedding = Embedding(input_dim=n_items + 1, output_dim=vector_size, weights=[fine_weights], mask_zero=True, name='item_embedding', trainable=True) item_embed = item_embedding(impressions_input) avg_embed = GlobalAvgPool1D(name='avg-embed')(item_embed) avg_embed = Dense(vector_size, activation='relu')(avg_embed) impression_embed = item_embedding(impression_input) impression_embed = Lambda(lambda r: K.squeeze(r, 1))(impression_embed) prediction = Dot(axes=1)([impression_embed, avg_embed]) prediction = Concatenate()([prediction, loc_input]) prediction = Dense(1, activation='sigmoid', name='prediction')(prediction) model = Model([impressions_input, loc_input, impression_input], prediction) return model
def build_model(): # model = Sequential() # model.add(LSTM(units=32, return_sequences=True, input_shape=(40, 1))) # model.add(LSTM(units=16, return_sequences=False)) # model.add(Dense(len(CLASSES), activation='softmax')) # model.summary() # return model model = Sequential() model.add( Conv1D(filters=4, kernel_size=9, activation='relu', input_shape=(40, 1), kernel_regularizer=l2(0.025))) model.add(MaxPool1D(strides=4)) model.add(BatchNormalization()) model.add(Conv1D(filters=64, kernel_size=4, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Conv1D(filters=32, kernel_size=1, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.75)) model.add(GlobalAvgPool1D()) model.add(Dense(2, activation='softmax')) model.summary() return model
def get_model(): input_cat = Input((len(cat_variables), )) input_num = Input((len(numeric_variables), )) x_cat = Embedding(len(cat_map), 20)(input_cat) x_cat_1 = Flatten()(x_cat) x_cat_1 = Dense(20, activation="relu")(x_cat_1) x_cat_2 = GlobalAvgPool1D()(x_cat) x_num = Dense(20, activation="relu")(input_num) x = concatenate([x_cat_1, x_cat_2, x_num]) x = Dropout(0.5)(x) x = Dense(100, activation="relu")(x) x = Dropout(0.5)(x) x = Dense(50, activation="relu")(x) x = Dropout(0.5)(x) x = Dense(20, activation="relu")(x) x = Dropout(0.5)(x) x = Dense(1, activation="linear")(x) model = Model(inputs=[input_cat, input_num], outputs=x) model.compile(loss="mae", optimizer=Adam(0.01)) model.summary() return model
def aggregate(self, input_1, input_2, num_dense=300, dropout_rate=0.1): feat1 = concatenate( [GlobalAvgPool1D()(input_1), GlobalMaxPool1D()(input_1)]) feat2 = concatenate( [GlobalAvgPool1D()(input_2), GlobalMaxPool1D()(input_2)]) x = concatenate([feat1, feat2]) x = BatchNormalization()(x) x = Dense(num_dense, activation='relu')(x) x = BatchNormalization()(x) x = Dropout(dropout_rate)(x) x = Dense(num_dense, activation='relu')(x) x = BatchNormalization()(x) x = Dropout(dropout_rate)(x) return x
def siamois_seq(maxlen, max_features): inp1 = Input(shape=(maxlen,)) inp2 = Input(shape=(maxlen,)) emb = Embedding(max_features, 256) com = CuDNNGRU(256, return_sequences=True) x1 = emb(inp1) x1 = com(x1) x2 = emb(inp2) x2 = com(x2) pool = GlobalMaxPool1D() avg = GlobalAvgPool1D() x1 = Concatenate()([pool(x1), avg(x1)]) x2 = Concatenate()([pool(x2), avg(x2)]) merge = submult(x1, x2) merge = Dropout(0.2)(merge) merge = Dense(512, activation='relu')(merge) merge = Dropout(0.2)(merge) preds = Dense(2, activation='softmax')(merge) model = Model(inputs=[inp1, inp2], outputs=preds) print(model.summary()) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def se_fn(x, amplifying_ratio): num_features = x.shape[-1].value x = GlobalAvgPool1D()(x) x = Reshape((1, num_features))(x) x = Dense(num_features * amplifying_ratio, activation='relu', kernel_initializer='glorot_uniform')(x) x = Dense(num_features, activation='sigmoid', kernel_initializer='glorot_uniform')(x) return x
def buildNNwithori(): comment_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') cv1_input = Input(shape=(6, ), dtype='float64') cv2_input = Input(shape=(6, ), dtype='float64') embedding_layer = Embedding(nb_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) embedded_sequences = embedding_layer(comment_input) embedded_sequences = SpatialDropout1D(0.1)(embedded_sequences) x = SpatialDropout1D(0.2)(embedded_sequences) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) x = Dropout(0.3)(x) x = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform")(x) avg_pool = GlobalAvgPool1D()(x) max_pool = GlobalMaxPool1D()(x) merged = concatenate([avg_pool, max_pool, cv1_input, cv2_input]) merged = Dropout(0.1)(merged) preds = Dense(6, activation='sigmoid')(merged) ######################################## ## train the model ######################################## model = Model(inputs=[comment_input, cv1_input, cv2_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(lr=1e-3), metrics=['accuracy']) # print(model.summary()) return model
def conv_block(x, n, kernel_size): x = Conv1D(n, kernel_size, activation='relu') (x) x = Conv1D(n_filters, kernel_size, activation='relu') (x) x_att = AttentionWithContext()(x) x_avg = GlobalAvgPool1D()(x) x_max = GlobalMaxPool1D()(x) return concatenate([x_att, x_avg, x_max])
def id_model(compile=True, lstm=True, verif=False): """ returns the identification model if used as base for verification, do not compile. if used to train on verification data, set verif = True """ inp = Input(shape=(299, 26)) x = SpatialDropout1D(.1)(inp) if (lstm): x = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x) else: x = Bidirectional(CuDNNGRU(256, return_sequences=True))(x) gmp = GlobalMaxPool1D()(x) gap = GlobalAvgPool1D()(x) x = concatenate([gmp, gap]) x = Dropout(.5)(x) x = BatchNormalization()(x) if (verif): x = Dense(1211, activation='softmax')(x) else: x = Dense(1251, activation='softmax')(x) model = Model(inp, x) if (compile): model.compile(loss='categorical_crossentropy', metrics=['acc', 'top_k_categorical_accuracy'], optimizer='adam') return model
def build_model_bilstm_layers(self): if args.use_lstm: if args.use_cudnn_cell: layer_cell = CuDNNLSTM else: layer_cell = LSTM else: if args.use_cudnn_cell: layer_cell = CuDNNGRU else: layer_cell = GRU # bert embedding bert_inputs, bert_output = KerasBertEmbedding().bert_encode() # bert_output = bert_output[:0:] # layer_get_cls = Lambda(lambda x: x[:, 0:1, :]) # bert_output = layer_get_cls(bert_output) # print("layer_get_cls:") # print(bert_output.shape) # Bi-LSTM x = Bidirectional( layer_cell(units=args.units, return_sequences=args.return_sequences, kernel_regularizer=regularizers.l2(args.l2 * 0.1), recurrent_regularizer=regularizers.l2( args.l2)))(bert_output) # blstm_layer = TimeDistributed(Dropout(args.keep_prob))(blstm_layer) 这个用不了,好像是输入不对, dims<3吧 x = Dropout(args.keep_prob)(x) x = Bidirectional( layer_cell(units=args.units, return_sequences=args.return_sequences, kernel_regularizer=regularizers.l2(args.l2 * 0.1), recurrent_regularizer=regularizers.l2(args.l2)))(x) x = Dropout(args.keep_prob)(x) x = Bidirectional( layer_cell(units=args.units, return_sequences=args.return_sequences, kernel_regularizer=regularizers.l2(args.l2 * 0.1), recurrent_regularizer=regularizers.l2(args.l2)))(x) x = Dropout(args.keep_prob)(x) # 平均池化、最大池化拼接 avg_pool = GlobalAvgPool1D()(x) max_pool = GlobalMaxPool1D()(x) print(max_pool.shape) print(avg_pool.shape) concat = concatenate([avg_pool, max_pool]) x = Dense(int(args.units / 4), activation="relu")(concat) x = Dropout(args.keep_prob)(x) # 最后就是softmax dense_layer = Dense(args.label, activation=args.activation)(x) output_layers = [dense_layer] self.model = Model(bert_inputs, output_layers)
def __init__(self): self.model = Sequential() self.model.add(Conv1D(filters=4, kernel_size=9, padding='same', input_shape = x_train.shape[1:], kernel_regularizer = l2(0.025))) # , kernel_initializer='glorot_normal' self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Conv1D(filters=8, kernel_size=9, padding='same', kernel_regularizer = l2(0.05))) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Conv1D(filters=8, kernel_size=9, padding='same', kernel_regularizer = l2(0.1))) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Conv1D(filters=16, kernel_size=7, padding='same')) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Dropout(dropout/2)) self.model.add(Conv1D(filters=16, kernel_size=7, padding='same')) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Dropout(dropout/2)) self.model.add(Conv1D(filters=32, kernel_size=4, padding='same')) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Dropout(dropout)) self.model.add(Conv1D(filters=32, kernel_size=4, padding='same')) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(MaxPool1D(strides=4)) self.model.add(BatchNormalization()) self.model.add(Dropout(dropout)) self.model.add(Conv1D(filters=64, kernel_size=1, padding='same')) self.model.add(LeakyReLU(alpha=0.2)) self.model.add(BatchNormalization()) self.model.add(Dropout(0.75)) self.model.add(GlobalAvgPool1D()) self.model.add(Dense(NB_CLASSES, activation='softmax'))
def model_basic(num_frame,num_sing): pos_anchor = Input(shape = (num_frame,128)) # item model **audio** conv1 = Conv1D(128,4,padding='same',use_bias=True,kernel_regularizer=l2(1e-5),kernel_initializer='he_uniform') bn1 = BatchNormalization() activ1 = Activation('relu') MP1 = MaxPool1D(pool_size=4) conv2 = Conv1D(128,4,padding='same',use_bias=True,kernel_regularizer=l2(1e-5),kernel_initializer='he_uniform') bn2 = BatchNormalization() activ2 = Activation('relu') MP2 = MaxPool1D(pool_size=4) conv3 = Conv1D(128,4,padding='same',use_bias=True,kernel_regularizer=l2(1e-5),kernel_initializer='he_uniform') bn3 = BatchNormalization() activ3 = Activation('relu') MP3 = MaxPool1D(pool_size=4) conv4 = Conv1D(128,2,padding='same',use_bias=True,kernel_regularizer=l2(1e-5),kernel_initializer='he_uniform') bn4 = BatchNormalization() activ4 = Activation('relu') MP4 = MaxPool1D(pool_size=2) conv5 = Conv1D(256,1,padding='same',use_bias=True,kernel_regularizer=l2(1e-5),kernel_initializer='he_uniform') bn5 = BatchNormalization() activ5 = Activation('relu') drop1 = Dropout(0.5) item_sem = GlobalAvgPool1D() # pos anchor pos_anchor_conv1 = conv1(pos_anchor) pos_anchor_bn1 = bn1(pos_anchor_conv1) pos_anchor_activ1 = activ1(pos_anchor_bn1) pos_anchor_MP1 = MP1(pos_anchor_activ1) pos_anchor_conv2 = conv2(pos_anchor_MP1) pos_anchor_bn2 = bn2(pos_anchor_conv2) pos_anchor_activ2 = activ2(pos_anchor_bn2) pos_anchor_MP2 = MP2(pos_anchor_activ2) pos_anchor_conv3 = conv3(pos_anchor_MP2) pos_anchor_bn3 = bn3(pos_anchor_conv3) pos_anchor_activ3 = activ3(pos_anchor_bn3) pos_anchor_MP3 = MP3(pos_anchor_activ3) pos_anchor_conv4 = conv4(pos_anchor_MP3) pos_anchor_bn4 = bn4(pos_anchor_conv4) pos_anchor_activ4 = activ4(pos_anchor_bn4) pos_anchor_MP4 = MP4(pos_anchor_activ4) pos_anchor_conv5 = conv5(pos_anchor_MP4) pos_anchor_bn5 = bn5(pos_anchor_conv5) pos_anchor_activ5 = activ5(pos_anchor_bn5) pos_anchor_sem = item_sem(pos_anchor_activ5) output = Dense(num_sing, activation='softmax')(pos_anchor_sem) model = Model(inputs = pos_anchor, outputs = output) return model
def BPNet(input_length=1000, output_length=1000, n_filters=64, kernel_size=21, n_dilated_layers=6, tconv_kernel_size=75, lr=0.004): sequence = Input(shape=(input_length, 4), name="sequence") control_counts = Input(shape=(1, ), name="control_logcount") control_profile = Input(shape=(output_length, 2), name="control_profile") x = Conv1D(n_filters, kernel_size=kernel_size, padding='same', activation='relu')(sequence) layers = [x] for i in range(1, n_dilated_layers + 1): layer_sum = x if i == 1 else add(layers) x = Conv1D(n_filters, kernel_size=3, padding='same', activation='relu', dilation_rate=2**i)(layer_sum) layers.append(x) layer_sum = add(layers) average_conv = GlobalAvgPool1D()(layer_sum) # Predict counts x_with_count_bias = concatenate([average_conv, control_counts], axis=-1) y_count = Dense(2, name="task0_logcount")(x_with_count_bias) # Reshape from 1D to 2D layer_sum = Reshape((-1, 1, n_filters))(layer_sum) x_profile = Conv2DTranspose(2, kernel_size=(tconv_kernel_size, 1), padding='same')(layer_sum) x_profile = Reshape((-1, 2))(x_profile) x_with_profile_bias = concatenate([x_profile, control_profile], axis=-1) y_profile = Conv1D(2, kernel_size=1, name="task0_profile")(x_with_profile_bias) inputs = [sequence, control_counts, control_profile] outputs = [y_count, y_profile] model = Model(inputs=inputs, outputs=outputs) model.compile('adam', loss=['mse', MultichannelMultinomialNLL(2)], loss_weights=[1, 1]) return model
def esim(maxlen, max_features, lstm_dim=32, dense_dim=64, dense_dropout=0.5): inp1 = Input(shape=(maxlen,)) inp2 = Input(shape=(maxlen,)) emb = Embedding(max_features, 256) emb1 = emb(inp1) emb2 = emb(inp2) #Encode encode = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True)) encoded1=encode(emb1) encoded2=encode(emb2) #Attention att1, att2 = soft_attention_alignment(encoded1, encoded2) #Compose comb1 = Concatenate()([encoded1, att2, submult(encoded1, att2)]) comb2 = Concatenate()([encoded2, att1, submult(encoded2, att1)]) compose = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True)) compare1 = compose(comb1) compare2 = compose(comb2) #Aggregate agg1 = apply_multiple(compare1, [GlobalAvgPool1D(), GlobalMaxPool1D()]) agg2 = apply_multiple(compare2, [GlobalAvgPool1D(), GlobalMaxPool1D()]) #Merge merge = Concatenate()([agg1, agg2]) dense = Dense(dense_dim, activation='relu')(merge) dense = Dropout(dense_dropout)(dense) preds = Dense(2, activation='softmax')(dense) model = Model(inputs=[inp1, inp2], outputs=preds) print(model.summary()) return model
def build_model(self): encoding_layer1 = Bidirectional( GRU(300, return_sequences=True, dropout=0.2)) encoded_sentence_1 = encoding_layer1(self.Q1_emb) # (?, len, 600) encoded_sentence_2 = encoding_layer1(self.Q2_emb) # (?, len, 600) q1_aligned, q2_aligned = soft_attention_alignment( encoded_sentence_1, encoded_sentence_2) q1_combined = Concatenate()([ encoded_sentence_1, q2_aligned, submult(encoded_sentence_1, q2_aligned) ]) q2_combined = Concatenate()([ encoded_sentence_2, q1_aligned, submult(encoded_sentence_2, q1_aligned) ]) encoding_layer2 = Bidirectional( GRU(300, return_sequences=True, dropout=0.2)) q1_compare = encoding_layer2(q1_combined) q2_compare = encoding_layer2(q2_combined) q1_rep = apply_multiple( q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = apply_multiple( q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) merged = Concatenate()([q1_rep, q2_rep]) dense = Dense(600, activation='elu')(merged) dense = Dropout(rate=0.5)(dense) predictions = Dense(1, activation='sigmoid')(dense) return predictions
def buildmodel(): comment_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedding_layer_glove = Embedding(nb_words, 300, weights=[embedding_matrix_glove], input_length=MAX_SEQUENCE_LENGTH, trainable=False) embedded_sequences_glove = embedding_layer_glove(comment_input) #embedded_sequences_glove = Activation('tanh')(embedded_sequences_glove) embedded_sequences_glove = SpatialDropout1D(0.1)(embedded_sequences_glove) embedding_layer_fasttext = Embedding(nb_words, 300, weights=[embedding_matrix_fasttext], input_length=MAX_SEQUENCE_LENGTH, trainable=False) embedded_sequences_fasttext = embedding_layer_fasttext(comment_input) #embedded_sequences_fasttext = Activation('tanh')(embedded_sequences_fasttext) #embedded_sequences_fasttext = SpatialDropout1D(0.1)(embedded_sequences_fasttext) x1 = Bidirectional(CuDNNGRU( 32, return_sequences=True))(embedded_sequences_glove) #x1 = Activation('tanh')(x1) x = Dropout(0.1)(x1) y1 = Bidirectional(CuDNNLSTM( 64, return_sequences=True))(embedded_sequences_fasttext) y1 = Dropout(0.1)(y1) y2 = Bidirectional( CuDNNGRU(64, return_sequences=True, go_backwards=True))(embedded_sequences_fasttext) y = concatenate([y1, y2]) y = Dropout(0.2)(y) x = concatenate([x, y]) x = TimeDistributed(Dense(128, activation='tanh'))(x) x1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(128, ))(x) x2 = GlobalAvgPool1D()(x) merged = concatenate([x1, x2]) merged = Dropout(0.1)(merged) preds = Dense(6, activation='sigmoid')(merged) ######################################## ## train the model ######################################## model = Model(inputs=[comment_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(lr=1e-3), metrics=['accuracy']) #print(model.summary()) return model
def build_model( embedding_matrix, n_words, n_char, wdim=200, cdim=25, rdim=1, entdim=100, ): in1 = Input(batch_shape=(None, None), dtype='int32') in2 = Input(batch_shape=(None, None, None), dtype='int32') in3 = Input(batch_shape=(None, None), dtype='int32') in5 = Input(shape=(entdim, )) in6 = Input(shape=(1, )) wemb1 = Embedding(n_words, wdim, weights=[embedding_matrix], trainable=False, mask_zero=False)(in1) cemb1 = Embedding(n_char, cdim, mask_zero=False)(in2) cemb1 = TimeDistributed(Bidirectional(LSTM(cdim)))(cemb1) remb1 = Embedding(2, rdim, mask_zero=False)(in3) out = Concatenate()([wemb1, cemb1, remb1]) out = Dropout(0.5)(out) out = SeparableConv1D(32, kernel_size=3, padding="same", activation="relu")(out) out1 = GlobalAvgPool1D()(out) out2 = GlobalMaxPool1D()(out) out = Concatenate()([out1, out2]) x3 = Dense(512, activation="relu")(in5) x4 = Dense(1, activation="relu")(in6) out = Dense(512, activation="relu")(out) out = Concatenate()([out, x3]) out = Dense(512, activation="relu")(out) out = Concatenate()([out, x4]) out = Dense(512, activation="relu")(out) out = Dropout(0.5)(out) out = Dense(1, activation="sigmoid")(out) model = Model([in1, in2, in3, in5, in6], out) model.compile(optimizer="nadam", loss="binary_crossentropy", metrics=["acc"]) return model
def get_rnn(model_flag, units, num_layers, dropout, reg, filter_nums): ''' RNN is the stack of processing that transforms from input to representation. consists of a stack of RNN and can use either final output from the RNN at the end, average pooling or pooling by CNN ''' '''define return_sequences for rnn''' if model_flag == 'baseline': return_sequences = False elif model_flag == 'avg' or model_flag == 'cnn': return_sequences = True else: raise ('invalid model_flag.') embedding_input = Input([None, 300]) rep = embedding_input ''' loop over rnn stack, then do a final run over the last rnn. Return sequences may be true or false in the last rnn depending on model, so need to define separately. ''' for i in range(num_layers - 1): rep = Bidirectional( GRU(units, name='RNN' + str(i), dropout=dropout, return_sequences=True, kernel_regularizer=l2(reg), recurrent_regularizer=l2(reg)))(rep) rep = Bidirectional( GRU(units, name='RNN' + str(i), dropout=dropout, return_sequences=return_sequences, kernel_regularizer=l2(reg), recurrent_regularizer=l2(reg)))(rep) '''define pooling method''' if model_flag == 'baseline': output = rep elif model_flag == 'avg': output = GlobalAvgPool1D()(rep) elif model_flag == 'cnn': cnn = get_conv_model(units, filter_nums) output = cnn(rep) RNN = Model(inputs=embedding_input, outputs=output) return RNN
def get_model(use_embeddings, dilated_convs, pooling, filter_sizes, num_filters, use_batch_normalization): params = {k: v for k, v in locals().iteritems() if k != 'weights'} x = Input(shape=(MAX_CHORDS, NUM_NOTES), dtype='float32') if use_embeddings: y = Dense(NUM_DIM, activation='linear', use_bias=False, weights=[M1], trainable=False)(x) else: y = x if use_batch_normalization: y = BatchNormalization()(y) y = get_conv_stack(y, num_filters, filter_sizes, 'relu', 0.0001, 0.5) if pooling == 'max': y = GlobalMaxPool1D()(y) elif pooling == 'avg': y = GlobalAvgPool1D()(y) elif pooling == 'flatten': y = Flatten()(y) y = Dropout(0.5)(y) y = Dense(100)(y) if use_batch_normalization: y = BatchNormalization()(y) y = Activation('relu')(y) y = Dropout(0.5)(y) y = Dense(MAX_LABELS, activation='sigmoid')(y) model = Model(x, y) adam = Adam(lr=c.lr) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=c.metrics) return (model, params)
def get_me_my_model(): model = Sequential() model.add(Conv1D(filters=4, kernel_size=9, activation='relu', input_shape = x_train.shape[1:], kernel_regularizer = l2(0.025))) model.add(MaxPool1D(strides=4)) model.add(BatchNormalization()) model.add(Conv1D(filters=16, kernel_size=9, activation='relu')) model.add(MaxPool1D(strides=4)) model.add(BatchNormalization()) model.add(Dropout(0.25)) model.add(Conv1D(filters=64, kernel_size=4, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Conv1D(filters=32, kernel_size=1, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.75)) model.add(GlobalAvgPool1D()) model.add(Dense(3, activation='softmax')) return model
def getModel1(input_shape, classes, num_words, emb_size, emb_matrix, emb_dropout=0.5, attention=0, dense=False, emb_trainable=False, gru=True): x_input = Input(shape=(input_shape, )) emb = Embedding(num_words, emb_size, weights=[emb_matrix], trainable=emb_trainable, name='embs')(x_input) emb = SpatialDropout1D(emb_dropout)(emb) if gru: rnn, rnn_fw, rnn_bw = Bidirectional( CuDNNGRU(100, return_sequences=True, return_state=True))(emb) else: rnn, rnn_fw, rnn_bw = Bidirectional( CuDNNLSTM(100, return_sequences=True, return_state=True))(emb) rnn_max = GlobalMaxPool1D()(rnn) rnn_avg = GlobalAvgPool1D()(rnn) rnn_last = concatenate([rnn_fw, rnn_bw]) x = concatenate([rnn_max, rnn_avg, rnn_last]) if dense: x = Dense(32, activation='relu')(x) x = Dropout(0.3)(x) x_output = Dense(classes, activation='softmax')(x) return Model(inputs=x_input, outputs=x_output)
def se_fn_lstm(x, amplifying_ratio, idx): num_features = x.shape[-1].value x = Activation(K.abs)(x) x = GlobalAvgPool1D()(x) x = Reshape((1, num_features))( x ) #for model AET_Convolution_16 change lstm units for num_features*ampliphying ratio x = LSTM(num_features, activation='relu', stateful=False, return_sequences=True, dropout=0.1, recurrent_dropout=0.1, name='se_lstm1_%s' % idx)(x) x = Dense(num_features * amplifying_ratio, activation='relu', kernel_initializer='glorot_uniform', name='se_dense1_%s' % idx)(x) x = Dense(num_features, activation='sigmoid', kernel_initializer='glorot_uniform', name='se_dense2_%s' % idx)(x) return x
def esim_word_char(embedding_matrix, char_embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: word_encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) word_compose = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) char_encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) char_compose = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: word_encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) word_compose = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) char_encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) char_compose = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = word_encode(q1_embed) q2_encoded = word_encode(q2_embed) q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) # q1_combined = Dropout(self.config['dense_dropout'])(q1_combined) # q2_combined = Dropout(self.config['dense_dropout'])(q2_combined) q1_compare = word_compose(q1_combined) q2_compare = word_compose(q2_combined) # Aggregate q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep]) mul_rep = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep]) # Classifier merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep]) q1_char = Input(shape=(config['char_max_length'], ), dtype='int32', name='q1_char_input') q2_char = Input((config['char_max_length'], ), dtype='int32', name='q2_char_input') char_embedding_layer = Embedding(char_embedding_matrix.shape[0], char_embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[char_embedding_matrix] # mask_zero=True ) q1_embed_char = char_embedding_layer(q1_char) q2_embed_char = char_embedding_layer(q2_char) # bsz, 1, emb_dims q1_embed_char = BatchNormalization(axis=2)(q1_embed_char) q2_embed_char = BatchNormalization(axis=2)(q2_embed_char) q1_embed_char = SpatialDropout1D( config['spatial_dropout_rate'])(q1_embed_char) q2_embed_char = SpatialDropout1D( config['spatial_dropout_rate'])(q2_embed_char) q1_encoded_char = char_encode(q1_embed_char) q2_encoded_char = char_encode(q2_embed_char) q1_aligned_char, q2_aligned_char = soft_attention_alignment( q1_encoded_char, q2_encoded_char) q1_combined_char = Concatenate()([ q1_encoded_char, q2_aligned_char, submult(q1_encoded_char, q2_aligned_char) ]) q2_combined_char = Concatenate()([ q2_encoded_char, q1_aligned_char, submult(q2_encoded_char, q1_aligned_char) ]) # q1_combined = Dropout(self.config['dense_dropout'])(q1_combined) # q2_combined = Dropout(self.config['dense_dropout'])(q2_combined) q1_compare_char = char_compose(q1_combined_char) q2_compare_char = char_compose(q2_combined_char) # Aggregate q1_rep_char = apply_multiple( q1_compare_char, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep_char = apply_multiple( q2_compare_char, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier sub_rep_char = Lambda(lambda x: K.abs(x[0] - x[1]))( [q1_rep_char, q2_rep_char]) mul_rep_char = Lambda(lambda x: x[0] * x[1])([q1_rep_char, q2_rep_char]) # Classifier merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep]) merged_char = Concatenate()( [q1_rep_char, q2_rep_char, sub_rep_char, mul_rep_char]) dense = BatchNormalization()(merged) dense = Dense(config['dense_dim'], activation='elu')(dense) dense_char = BatchNormalization()(merged_char) dense_char = Dense(config['dense_dim'], activation='elu')(dense_char) feature_input = Input(shape=(config['feature_length'], )) feature_dense = BatchNormalization()(feature_input) feature_dense = Dense(config['dense_dim'], activation='relu')(feature_dense) dense = Concatenate()([dense, dense_char, feature_dense]) dense = BatchNormalization()(dense) dense = Dropout(config['dense_dropout'])(dense) dense = Dense(config['dense_dim'], activation='elu')(dense) dense = BatchNormalization()(dense) dense = Dropout(config['dense_dropout'])(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, q1_char, q2_char, feature_input], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
def build(self): a = self.a b = self.get_b() weights = np.load( os.path.join(self.config.embedding_path, self.config.level + '_level', self.config.embedding_file)) embedding_layer = Embedding(input_dim=weights.shape[0], output_dim=weights.shape[-1], weights=[weights], name='embedding_layer', trainable=True) a_embedding = embedding_layer(a) b_embedding = embedding_layer(b) if self.config.level == 'word_level': a_char = Input(shape=(self.config.max_len, self.config.char_per_word), dtype='int32', name='a_char_base') b_char = Input(shape=(self.config.max_len, self.config.char_per_word), dtype='int32', name='b_char_base') weights_char = np.load( os.path.join(self.config.embedding_path, 'char_level', self.config.embedding_file)) char_emb = self.char_embedding(weights_char) a_char_embedding = char_emb(a_char) a_embedding = concatenate([a_embedding, a_char_embedding]) b_char_embedding = char_emb(b_char) b_embedding = concatenate([b_embedding, b_char_embedding]) bilstm_layer = Bidirectional(CuDNNLSTM(300, return_sequences=True)) a_lstm = bilstm_layer(a_embedding) b_lstm = bilstm_layer(b_embedding) attention = Dot(axes=-1)([a_lstm, b_lstm]) wb = Lambda(lambda x: softmax(x, axis=1), output_shape=lambda x: x)(attention) wa = Permute((2, 1))(Lambda(lambda x: softmax(x, axis=2), output_shape=lambda x: x)(attention)) a_ = Dot(axes=1)([wa, b_lstm]) b_ = Dot(axes=1)([wb, a_lstm]) neg = Lambda(lambda x: -x, output_shape=lambda x: x) substract1 = Add()([a_lstm, neg(a_)]) mutiply1 = Multiply()([a_lstm, a_]) substract2 = Add()([b_lstm, neg(b_)]) mutiply2 = Multiply()([b_lstm, b_]) m_a = concatenate([a_lstm, a_, substract1, mutiply1], axis=-1) m_b = concatenate([b_lstm, b_, substract2, mutiply2], axis=-1) compose = Bidirectional(CuDNNLSTM(300, return_sequences=True)) v_a = compose(m_a) v_b = compose(m_b) a_maxpool = GlobalMaxPool1D()(v_a) b_maxpool = GlobalMaxPool1D()(v_b) a_avgpool = GlobalAvgPool1D()(v_a) b_avgpool = GlobalAvgPool1D()(v_b) a = concatenate([a_avgpool, a_maxpool], axis=-1) b = concatenate([b_avgpool, b_maxpool], axis=-1) similarity = self.cosine() dropout = Dropout(self.config.dropout) simi = Lambda(similarity, output_shape=lambda _: (None, 1))([dropout(a), dropout(b)]) return simi
def build_model(self): # word_encoding_layer1 = Bidirectional(GRU(300, # return_sequences=True, # dropout=0.2)) # # encoded_sentence_1 = word_encoding_layer1(self.Q1_emb) # (?, len, 600) # encoded_sentence_2 = word_encoding_layer1(self.Q2_emb) # (?, len, 600) # # q1_aligned, q2_aligned = minus_soft_attention_alignment(encoded_sentence_1, encoded_sentence_2) # # q1_aligned, q2_aligned = soft_attention_alignment(encoded_sentence_1, encoded_sentence_2) # # q1_combined = Concatenate()([encoded_sentence_1, q2_aligned, submult(encoded_sentence_1, q2_aligned)]) # q2_combined = Concatenate()([encoded_sentence_2, q1_aligned, submult(encoded_sentence_2, q1_aligned)]) # # word_encoding_layer2 = Bidirectional(GRU(300, # return_sequences=True, # dropout=0.2)) # word_q1_compare = word_encoding_layer2(q1_combined) # word_q2_compare = word_encoding_layer2(q2_combined) # # word_q1_rep = apply_multiple(word_q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # word_q2_rep = apply_multiple(word_q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) char_encoding_layer1 = Bidirectional( GRU(300, return_sequences=True, dropout=0.2)) encoded_sentence_1 = char_encoding_layer1( self.Q1_char_emb) # (?, len, 600) encoded_sentence_2 = char_encoding_layer1( self.Q2_char_emb) # (?, len, 600) q1_aligned, q2_aligned = minus_soft_attention_alignment( encoded_sentence_1, encoded_sentence_2) # q1_aligned, q2_aligned = soft_attention_alignment(encoded_sentence_1, encoded_sentence_2) q1_combined = Concatenate()([ encoded_sentence_1, q2_aligned, submult(encoded_sentence_1, q2_aligned) ]) q2_combined = Concatenate()([ encoded_sentence_2, q1_aligned, submult(encoded_sentence_2, q1_aligned) ]) char_encoding_layer2 = Bidirectional( GRU(300, return_sequences=True, dropout=0.2)) char_q1_compare = char_encoding_layer2(q1_combined) char_q2_compare = char_encoding_layer2(q2_combined) char_q1_rep = apply_multiple( char_q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) char_q2_rep = apply_multiple( char_q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # merged = Concatenate()([word_q1_rep, word_q2_rep, char_q1_rep, char_q2_rep]) self.magic = Input(shape=(4, ), dtype='float32', name='magic_input') magic_dense = BatchNormalization()(self.magic) magic_dense = Dense(64, activation='elu')(magic_dense) merged = Concatenate()([char_q1_rep, char_q2_rep, magic_dense]) # merged = Concatenate()([word_q1_rep, word_q2_rep, magic_dense]) dense = Dense(600, activation='elu')(merged) dense = Dropout(rate=0.5)(dense) predictions = Dense(1, activation='sigmoid')(dense) return predictions