def build_doc2vec_model(conf,dp): n_terms = len(dp.idx2word) # initialize parameters (embeddings) word_embed_data = np.array(dp.word_embed) item_embed_data = np.random.rand(dp.get_item_size(), conf.dim_word) print(dp.get_item_size()) print("finish data processing") # define model word_input = Input(shape=(1,), dtype ="int32", name ="word_idx") item_pos_input = Input(shape=(1,), dtype ="int32", name ="item_pos_idx") item_neg_input = Input(shape=(1,), dtype ="int32", name ="item_neg_idx") word_embed = Embedding(output_dim=conf.dim_word, input_dim=n_terms, input_length=1, name="word_embed", weights=[word_embed_data], trainable=False) item_embed = Embedding(output_dim=conf.dim_word, input_dim=dp.get_item_size(), input_length=1, name="item_embed", weights=[item_embed_data], trainable=True) word_embed_ = word_embed(word_input) item_pos_embed_ = item_embed(item_pos_input) item_neg_embed_ = item_embed(item_neg_input) word_flatten = Flatten() word_embed_ = word_flatten(word_embed_) item_pos_embed_ = Flatten()(item_pos_embed_) item_neg_embed_ = Flatten()(item_neg_embed_) pos_layer_ = Dot(axes=-1, normalize=False, name="pos_layer")([word_embed_, item_pos_embed_]) neg_layer_ = Dot(axes=-1, normalize=False, name="neg_layer")([word_embed_, item_neg_embed_]) merge_layer_ = Concatenate(axis=-1, name="merge_layer")([pos_layer_, neg_layer_]) model = Model(input=[word_input, item_pos_input, item_neg_input], output=[merge_layer_, pos_layer_]) def ranking_loss(y_true, y_pred): pos = y_pred[:,0] neg = y_pred[:,1] loss = K.maximum(0.5 + neg - pos, 0.0) return K.mean(loss) + 0 * y_true def dummy_loss(y_true, y_pred): # loss = K.max(y_pred) + 0 * y_true loss = y_pred + 0 * y_true return loss # model = make_parallel(model, int(os.environ['MKL_NUM_THREADS'])) model.compile(optimizer=Adam(lr=0.01), loss = {'merge_layer' : ranking_loss, "pos_layer": dummy_loss}, loss_weights=[1, 0]) print("finish model compiling") print(model.summary()) return model, item_embed, word_embed
def get_cnn_model1(self): get_diag = Lambda(lambda xin: K.sum(xin*T.eye(self.max_opt_count),axis=2),output_shape=(self.max_opt_count,)) transp_out = Lambda(lambda xin: K.permute_dimensions(xin,(0,2,1)),output_shape=(self.max_opt_count,self.word_vec_size)) apply_weights = Lambda(lambda xin: (K.expand_dims(xin[0],axis=-1)*K.expand_dims(xin[1],axis=2)).sum(axis=1), output_shape=(self.word_vec_size,self.max_opt_count)) tile_q = Lambda(lambda xin: K.tile(xin,(1,self.max_opt_count,1,1)),output_shape=(self.max_opt_count,self.max_q_length,self.word_vec_size)) exp_dims = Lambda(lambda xin: K.expand_dims(xin,1), output_shape=(1,self.max_q_length,self.word_vec_size)) exp_dims2 = Lambda(lambda xin: K.expand_dims(xin,3), output_shape=(None,self.word_vec_size,1)) exp_layer = Lambda(lambda xin: K.exp(xin), output_shape=(self.max_sent_para,self.max_opt_count)) final_weights = Lambda(lambda xin: xin/K.cast(K.sum(xin, axis=1, keepdims=True), K.floatx()),output_shape=(self.max_sent_para,self.max_opt_count)) mask_weights = Lambda(lambda xin: T.switch(T.eq(xin,0),np.NINF,xin), output_shape=(self.max_sent_para,self.max_opt_count)) glob_pool = Lambda(lambda xin: K.mean(xin, axis=[1, 2]),output_shape=(100,)) filter_sizes = [3,4,5] num_filters = 100 q_input = Input(shape=(self.max_q_length, self.word_vec_size), name='question_input') q_exp = exp_dims(q_input) q_rep = tile_q(q_exp) option_input = Input(shape=(self.max_opt_count, self.max_option_length,self.word_vec_size), name='option_input') opt_q = Concatenate(axis=2)([q_rep,option_input]) cnn_input = Input(shape=(None, self.word_vec_size), name='cnn_input') cnn_reshape = exp_dims2(cnn_input) conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], self.word_vec_size), padding='valid', kernel_initializer='normal', activation='linear')(cnn_reshape) conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], self.word_vec_size), padding='valid', kernel_initializer='normal', activation='linear')(cnn_reshape) conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], self.word_vec_size), padding='valid', kernel_initializer='normal', activation='linear')(cnn_reshape) meanpool_0 = glob_pool(conv_0) meanpool_1 = glob_pool(conv_1) meanpool_2 = glob_pool(conv_2) concatenated_tensor = Concatenate(axis=1)([meanpool_0, meanpool_1, meanpool_2]) cnn_model = Model(inputs=cnn_input,outputs=concatenated_tensor) cnn_td_opt = TimeDistributed(cnn_model)(opt_q) doc_input = Input(shape=(self.max_sent_para, self.max_words_sent, self.word_vec_size), name='doc_input') cnn_doc = TimeDistributed(cnn_model)(doc_input) att_wts = Dot(axes=2,normalize=True)([cnn_doc,cnn_td_opt]) att_wts = mask_weights(att_wts) att_wts = exp_layer(att_wts) att_wts = final_weights(att_wts) out = apply_weights([cnn_doc,att_wts]) out = transp_out(out) dp = Dot(axes=2,normalize=True)([out,cnn_td_opt]) out = get_diag(dp) probs = MaskedSoftmax()([out,option_input]) main_model = Model(inputs=[q_input,doc_input,option_input],outputs=probs) sgd = SGD(lr=0.1, decay=0., momentum=0., nesterov=False) main_model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy']) main_model.summary() return main_model
def createSimilarityBranch(embedding_size, mode='innerprod', negative_size=0, graph='A', scale_negative=False): """ Branch of global network: computes similarity between embeddings of given two nodes in a graph """ inputT = Input(shape=( 1, embedding_size, ), name='Embedding_Target') inputC = Input(shape=( 1, embedding_size, ), name='Embedding_Context') inputs = [inputT, inputC] layer_name = 'Output_Similarity' if mode == 'l2': # l2 distance similarity = Lambda(l2_dist, output_shape=l2_output_shape_sim, name=layer_name)(inputs) elif mode == 'cossim': # cosine similarity similarity = Dot(axes=-1, normalize=True, name=layer_name)(inputs) elif mode == 'innerprod': # inner product similarity = Dot(axes=-1, name=layer_name)(inputs) else: # softmax (default) inputNS = Input(shape=( negative_size, embedding_size, ), name='Embedding_NS') inputs.append(inputNS) similarityTC = Dot(axes=-1)([inputT, inputC]) similarityCNS = Dot(axes=-1)([inputC, inputNS]) similarity = Concatenate(axis=-1)([similarityTC, similarityCNS]) similarity = Activation('softmax', name=layer_name)(similarity) # normalize negative samples loss if scale_negative: def normalizeNS(x): # from keras import backend as K # x = K.eval(x) # for i in range(1,len(x)): # x[i] = x[i]/negative_size # return K.variable(x) return x / negative_size similarity = Activation(normalizeNS)(similarity) return Model(inputs, similarity, name='Branch_Similarity' + graph)
def make_word2vec_model(embedding_dim, num_words): ''' embedding_dim: (int) embedding dimension num_words: (int) size of the vocabulary ''' word_input = Input(shape=(1, ), dtype='int32') context_input = Input(shape=(1, ), dtype='int32') print '*' * 10 print 'input layer: ', word_input word_embedding = Embedding(num_words, embedding_dim) we = word_embedding(word_input) print 'word embedding layer: ', we context_embedding = Embedding(num_words, embedding_dim) ce = Reshape((embedding_dim, 1))(context_embedding(context_input)) print 'context embedding layer: ', ce dots = Dot((1, 2))([ce, we]) print 'merge layer: ', dots flat = Flatten()(dots) print 'flattern layer: ', flat print '*' * 10 acts = Activation('sigmoid')(flat) model = Model(inputs=[word_input, context_input], outputs=acts) model.compile('adam', loss='binary_crossentropy') return model
def build_model(preprocessor): inputs = Input(shape=(X_max_len,)) # encoder network x = Embedding(X_vocab_size, 200, input_length=X_max_len, mask_zero=False, weights=preprocessor.init_vectors)(inputs) hidden_state_encoder = Bidirectional(LSTM(hidden_size, return_sequences=True), merge_mode='concat')(x) # hidden_state_encoder = Bidirectional(LSTM(hidden_size), merge_mode='concat')(hidden_state_encoder) e = Flatten()(hidden_state_encoder) e = Dropout(0.3)(e) g = Dense(max_seq_len*X_max_len)(e) g = Dropout(0.3)(g) g = Dense(max_seq_len*X_max_len, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l2(0.000001))(g) g = Reshape((X_max_len, max_seq_len))(g) distribituion_over_input = Activation(softMaxAxis1, name='attention_wts')(g) encoder_output_after_attention = Dot(axes=(1,1))([distribituion_over_input, hidden_state_encoder]) # decoder network for _ in range(num_layers): x = LSTM(hidden_size, return_sequences=True)(encoder_output_after_attention) hidden_state_output = TimeDistributed(Dense(100))(x) hidden_state_output = TimeDistributed(Dropout(0.3))(hidden_state_output) hidden_state_output = TimeDistributed(Dense(y_vocab_size+1))(hidden_state_output) y = Activation('softmax')(hidden_state_output) model = Model(input=[inputs], output=[y]) rmsprop = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.0) model.compile(loss='categorical_crossentropy', optimizer=rmsprop, metrics=['accuracy']) return model
def add_attention_layer(query_embedding, doc_embedding, layer_name, query_mask=None, doc_mask=None, mask=False): dot_prod = Dot(axes=-1, name=layer_name)([doc_embedding, query_embedding]) norm_sim = Activation('softmax')(dot_prod) if mask: max_sim = Lambda(lambda x: max_pooling_with_mask(x[0], x[1]), output_shape=lambda inp_shp: ( inp_shp[0][0], inp_shp[0][2], ))([norm_sim, query_mask]) mean_sim = Lambda(lambda x: mean_pooling_with_mask(x[0], x[1], x[2]), output_shape=lambda inp_shp: ( inp_shp[0][0], inp_shp[0][2], ))([norm_sim, doc_mask, query_mask]) else: max_sim = Lambda(max_pooling, output_shape=lambda inp_shp: ( inp_shp[0], inp_shp[2], ))(norm_sim) mean_sim = Lambda(mean_pooling, output_shape=lambda inp_shp: ( inp_shp[0], inp_shp[2], ))(norm_sim) return norm_sim, max_sim, mean_sim
def __init__(self, episodes=None, existing_model_file=None, bag_of_words=False): if existing_model_file and os.path.exists(existing_model_file): self.model = load_model(existing_model_file) return elif episodes is None: raise RuntimeError("must provide either existing model or input data") max_token_length = len(episodes[0].get_tokens()) scene_feat_length = len(episodes[0].get_referent()) max_token_id = int(max([x for e in episodes for x in e.get_tokens()])) self.bag_of_words = bag_of_words if bag_of_words: tokens_input = Input(shape=(max_token_id,), name="utterance_input", dtype='float32') tokens = Dropout(0.5)(tokens_input) prediction = Dense(scene_feat_length, name="hidden_layer", kernel_regularizer=l1(0.001))(tokens) else: tokens_input = Input(shape=(max_token_length,), name="utterance_input", dtype='int32') embeddings = Embedding(output_dim=scene_feat_length, input_dim=(max_token_id+1), name="embedding_layer")(tokens_input) prediction = GRU(scene_feat_length, activation=None, name="recurrent_layer", dropout=0.2, recurrent_dropout=0.2, kernel_regularizer=l1(0.001))(embeddings) scene_input = Input(shape=(scene_feat_length,), name="scene_input", dtype="float32") dotproduct = Dot(axes=1)([prediction, scene_input]) output = Activation("sigmoid")(dotproduct) rmsprop = RMSprop(lr=0.01, rho=0.9, epsilon=1e-08, decay=0.001) model = Model(inputs=[tokens_input,scene_input], outputs=output) model.compile(optimizer=rmsprop,loss='binary_crossentropy', metrics=['accuracy']) self.model = model
def build(pfp_len=2048, rxnfp_len=2048, l2v=0.01): input_pfp = Input(shape=(pfp_len, )) input_rxnfp = Input(shape=(rxnfp_len, )) input_pfp_h1 = Dense(1024, activation='elu')(input_pfp) input_pfp_h2 = Dropout(0.3)(input_pfp_h1) input_pfp_h3 = Highway_self(activation='elu')(input_pfp_h2) input_pfp_h4 = Highway_self(activation='elu')(input_pfp_h3) input_pfp_h5 = Highway_self(activation='elu')(input_pfp_h4) input_pfp_h6 = Highway_self(activation='elu')(input_pfp_h5) input_pfp_h7 = Highway_self(activation='elu')(input_pfp_h6) input_rxnfp_h1 = Dense(1024, activation='elu')(input_rxnfp) merged_h1 = Dot(axes=1, normalize=False)([input_pfp_h7, input_rxnfp_h1]) output = Dense(1, activation='sigmoid')(merged_h1) model = Model([input_pfp, input_rxnfp], output) model.count_params() model.summary() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[ 'acc', keras.metrics.Precision(name='precision'), keras.metrics.Recall(name='recall') ]) return model
def build_model(preprocessor): inputs = Input(shape=(X_max_len, )) # encoder network x = Embedding(X_vocab_size, 200, input_length=X_max_len, mask_zero=False, weights=preprocessor.init_vectors)(inputs) hidden_state_encoder = LSTM(hidden_size, return_sequences=True)(x) e = Flatten()(hidden_state_encoder) g = Dense(max_seq_len * X_max_len)(e) g = Reshape((X_max_len, max_seq_len))(g) distribituion_over_input = Activation(softMaxAxis1, name='attention_wts')(g) encoder_output_after_attention = Dot(axes=(1, 1))( [distribituion_over_input, hidden_state_encoder]) # decoder network for _ in range(num_layers): x = LSTM(hidden_size, return_sequences=True)(encoder_output_after_attention) hidden_state_output = TimeDistributed(Dense(y_vocab_size + 1))(x) y = Activation('softmax')(hidden_state_output) model = Model(input=[inputs], output=[y]) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def _merge_cosim_layer(self, model_1, model_2): ''' Computes the cosine similarity between two tensors. :param model_1: The first tensor. :param model_2: The second tensor. :return: The cosine similarity value between model_1 and model_2. ''' return Dot(axes=1, normalize=True)([model_1, model_2])
def train(self, walks, dataset, size, window, negative, pair_ratio, epochs, lambda0): vocab_size = len(self.G.nodes()) k = len(self.G.nums_type) # pairwise model input_target = Input((1,)) embedding_target = Embedding(vocab_size, size, name="emb_0") target = embedding_target(input_target) input_context = Input((1,)) context = Embedding(vocab_size, size)(input_context) dot_product = Dot(axes=2)([target, context]) dot_product = Reshape((1,))(dot_product) output = Activation('sigmoid')(dot_product) model_p = Model(input=[input_target, input_context], output=output) model_p.summary() model_p.compile(loss='binary_crossentropy', optimizer='rmsprop', loss_weights=[1]) # tuplewise model input_tuplew = Input(shape=(k, ), name='input', dtype='int32') tuplew = embedding_target(input_tuplew) conv = Conv1D(32, 3, activation='relu',name='conv')(tuplew) pooling = GlobalMaxPooling1D(name='pooling')(conv) output_tuplew = Dense(1,activation='sigmoid')(pooling) model_t = Model(input=input_tuplew, output=output_tuplew) model_t.summary() model_t.compile(loss='binary_crossentropy', optimizer='rmsprop', loss_weights=[lambda0]) for epoch in range(epochs): random.shuffle(walks) loss_p = 0. loss_t = 0. for walk in tqdm(walks,ascii=True): pairs, labels_p = skipgrams(walk, vocab_size, negative_samples=negative, window_size=window) tuples, labels_t = self._tuple_sample(walk, negative_samples=negative, pair_ratio=pair_ratio) if pairs: x_pair = [np.array(x) for x in zip(*pairs)] y_pair = np.array(labels_p, dtype=np.int32) loss_p += model_p.train_on_batch(x_pair,y_pair) if tuples: x_tuple = np.asarray(tuples) y_tuple = np.array(labels_t, dtype=np.int32) loss_t += model_t.train_on_batch(x_tuple,y_tuple) print("epoch:",epoch+1) print("loss:",loss_p/len(walks),loss_t/len(walks),(loss_p+lambda0*loss_t)/len(walks)) self.wv = model_p.get_layer('emb_0').get_weights()[0] self.model_t = model_t
def create_deepconn_dp(self): ''' simple dot product instead of factorization machine for final layer. this simplification yielded similar results in the paper and should work better on small data due to less overfitting. ''' dotproduct = Dot(axes=1)([self.towerU, self.towerI]) output = Add()([self.outNeuron, dotproduct]) model = Model(inputs=[self.inputU, self.inputI], outputs=[output]) model.compile(optimizer='adam', loss='mse') return model
def generate_embeddedModel(self): userModel = self.userEmbeddingModel() movieModel = self.movieEmbeddingModel() x = Dot(axes=-1)([userModel.output, movieModel.output]) model = Model([userModel.input, movieModel.input], x) print('ID : ', id(self)) print(model.summary()) self.deep_model = model return model
def get_gru_baseline(self): lstm_qo = GRU(100,return_sequences=False) get_diag = Lambda(lambda xin: K.sum(xin*T.eye(self.max_opt_count),axis=2),output_shape=(self.max_opt_count,)) transp_out = Lambda(lambda xin: K.permute_dimensions(xin,(0,2,1)),output_shape=(self.max_opt_count,100)) apply_weights = Lambda(lambda xin: (K.expand_dims(xin[0],axis=-1)*K.expand_dims(xin[1],axis=2)).sum(axis=1), output_shape=(100,self.max_opt_count)) tile_q = Lambda(lambda xin: K.tile(xin,(1,self.max_opt_count,1,1)),output_shape=(self.max_opt_count,self.max_q_length,self.word_vec_size)) exp_dims = Lambda(lambda xin: K.expand_dims(xin,1), output_shape=(1,self.max_q_length,self.word_vec_size)) exp_layer = Lambda(lambda xin: K.exp(xin), output_shape=(self.max_sent_para,self.max_opt_count)) mask_weights = Lambda(lambda xin: T.switch(T.eq(xin,0),np.NINF,xin), output_shape=(self.max_sent_para,self.max_opt_count)) final_weights = Lambda(lambda xin: xin/K.cast(K.sum(xin, axis=1, keepdims=True), K.floatx()),output_shape=(self.max_sent_para,self.max_opt_count)) q_input = Input(shape=(self.max_q_length, self.word_vec_size), name='question_input') q_exp = exp_dims(q_input) q_rep = tile_q(q_exp) option_input = Input(shape=(self.max_opt_count, self.max_option_length,self.word_vec_size), name='option_input') opt_q = Concatenate(axis=2)([q_rep,option_input]) lstm_input = Input(shape=(None, self.word_vec_size), name='lstm_input') lstm_mask = Masking(mask_value=0.)(lstm_input) lstm_out = lstm_qo(lstm_mask) lstm_model = Model(inputs=lstm_input,outputs=lstm_out) lstm_td_opt = TimeDistributed(lstm_model)(opt_q) doc_input = Input(shape=(self.max_sent_para, self.max_words_sent, self.word_vec_size), name='doc_input') lstm_doc = TimeDistributed(lstm_model)(doc_input) att_wts = Dot(axes=2,normalize=True)([lstm_doc,lstm_td_opt]) att_wts = mask_weights(att_wts) att_wts = exp_layer(att_wts) att_wts = final_weights(att_wts) out = apply_weights([lstm_doc,att_wts]) out = transp_out(out) dp = Dot(axes=2,normalize=True)([out,lstm_td_opt]) out = get_diag(dp) probs = MaskedSoftmax()([out,option_input]) main_model = Model(inputs=[q_input,doc_input,option_input],outputs=probs) sgd = SGD(lr=0.1, decay=0., momentum=0., nesterov=False) main_model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy']) main_model.summary() return main_model
def conv_network(x1_shape, input_dims, embedding_matrix): input_a = Input(shape=x1_shape[1:]) input_b = Input(shape=x1_shape[1:]) def create_base_network(x1_shape, input_dims, embedding_matrix): inp = Input(x1_shape[1:]) embedding_block = Embedding(input_dim=input_dims, output_dim=300, weights=[embedding_matrix], input_length=40, trainable=False)(inp) embedding_block = Convolution1D( 64, 5, )(embedding_block) embedding_block = PReLU()(embedding_block) embedding_block = Dropout(0.2)(embedding_block) embedding_block = Convolution1D( 64, 5, )(embedding_block) embedding_block = PReLU()(embedding_block) embedding_block = GlobalMaxPooling1D()(embedding_block) embedding_block = BatchNormalization()(embedding_block) embedding_block = Model(inputs=inp, outputs=embedding_block) return embedding_block base_network = create_base_network(x1_shape, input_dims, embedding_matrix) processed_a = base_network(input_a) # shared processed_b = base_network(input_b) # layers distance1 = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)( [processed_a, processed_b]) distance2 = Dot(axes=1, normalize=True)([processed_a, processed_b]) merged = Concatenate()([processed_a, processed_b, distance2]) merged = BatchNormalization()(merged) merged = Dense(64)(merged) merged = PReLU()(merged) merged = Dropout(0.1)(merged) merged = BatchNormalization()(merged) merged = Dense(64)(merged) merged = PReLU()(merged) merged = BatchNormalization()(merged) merged = Dense(1, activation="sigmoid")(merged) model = Model(inputs=[input_a, input_b], outputs=merged) model.compile(loss="binary_crossentropy", optimizer=Adam(1e-3), metrics=["accuracy"]) return model
def gater(self): dim_inputs_data = Input(shape=(self.train_dim[1],)) dim_svm_yhat = Input(shape=(self.experts,)) layer_1 = Dense(self.hidden_units, activation='sigmoid')(dim_inputs_data) layer_2 = Dense(self.experts, name='layer_op_2', activation='sigmoid', use_bias=False)(layer_1) layer_3 = Dot(1)([layer_2, dim_svm_yhat]) out_layer = Dense(1, activation='tanh')(layer_3) model = Model(input=[dim_inputs_data, dim_svm_yhat], output=out_layer) adam = optimizers.Adam(lr=0.01) model.compile(loss='mse', optimizer=adam, metrics=['accuracy']) return model
def createDistanceBranch(embedding_size, mode='l2'): """ Branch of global network: computes all pairwise distances between node embeddings of different graphs """ input1 = Input(shape=(embedding_size, ), name='Input_EmbeddingA') input2 = Input(shape=(embedding_size, ), name='Input_EmbeddingB') layer_name = 'Output_DistanceAB' if mode == 'innerprod': # inner product distance12 = Dot(axes=-1, name=layer_name)([input1, input2]) elif mode == 'cossim': # cosine similarity distance12 = Dot(axes=-1, normalize=True, name=layer_name)([input1, input2]) else: # euclidean distance (default) # def l2_graphdist(vects): # x, y = vects # sum_square = K.sum(K.square(x - y), axis=1, keepdims=True) # return K.sigmoid(K.sqrt(K.maximum(sum_square, K.epsilon())))-.5 # distance12 = Lambda(l2_graphdist, output_shape=l2_output_shape, name=layer_name)([input1,input2]) distance12 = Lambda(l2_dist, output_shape=l2_output_shape_dist, name=layer_name)([input1, input2]) return Model([input1, input2], distance12, name='Branch_Distance')
def add_attention_layer_with_query_weighting(query_embedding, doc_embedding, layer_name, attention_level, query_weight, query_mask=None, doc_mask=None, mask=False): """ Dot -> softmax -> pooling -> (mask) -> weighting """ dot_prod = Dot(axes=-1, name=layer_name)([doc_embedding, query_embedding]) norm_sim = Activation('softmax')(dot_prod) if mask: max_sim = Lambda(lambda x: max_pooling_with_mask(x[0], x[1]), output_shape=lambda inp_shp: ( inp_shp[0][0], inp_shp[0][2], ))([norm_sim, query_mask]) mean_sim = Lambda(lambda x: mean_pooling_with_mask(x[0], x[1], x[2]), output_shape=lambda inp_shp: ( inp_shp[0][0], inp_shp[0][2], ))([norm_sim, doc_mask, query_mask]) else: max_sim = Lambda(max_pooling, output_shape=lambda inp_shp: ( inp_shp[0], inp_shp[2], ))(norm_sim) mean_sim = Lambda(mean_pooling, output_shape=lambda inp_shp: ( inp_shp[0], inp_shp[2], ))(norm_sim) if attention_level <= 1: setattr(K, 'params', {'attention_level': attention_level}) max_sim = Lambda(lambda x: elementwise_prod(x[0], x[1]), output_shape=lambda inp_shp: ( inp_shp[0][0], inp_shp[0][1], ))([max_sim, query_weight]) mean_sim = Lambda(lambda x: elementwise_prod(x[0], x[1]), output_shape=lambda inp_shp: (inp_shp[0][0], inp_shp[0][1]))( [mean_sim, query_weight]) return norm_sim, max_sim, mean_sim
def __init__(self, n_users, n_items, config=None): super().__init__(n_users, n_items, config) self.implicit = np.zeros((self.n_items, self.n_users, )) # Defaults default = {'n_factors': 40, 'reg_bias': 0.00005, 'reg_latent': 0.00003, 'implicit_thresh': 4.0, 'implicit_thresh_crosstrain': 4.75} default.update(self.config) self.config = default n_factors = self.config['n_factors'] reg_bias = l2(self.config['reg_bias']) reg_latent = l2(self.config['reg_latent']) self.implicit_thresh = self.config.get('implicit_thresh', 4.0) self.implicit_thresh_crosstrain = self.config.get('implicit_thresh_crosstrain', 4.75) input_u = Input((1,)) input_i = Input((1,)) vec_u = Embedding(self.n_users, n_factors, input_length=1, embeddings_regularizer=reg_latent)(input_u) vec_u_r = Flatten()(vec_u) vec_implicit = Embedding(self.n_items, self.n_users, input_length=1, trainable=False, name='implicit')( input_i) implicit_factors = Dense(n_factors, kernel_initializer='normal', activation='linear', kernel_regularizer=reg_latent)(vec_implicit) implicit_factors = Flatten()(implicit_factors) mf = Dot(1)([implicit_factors, vec_u_r]) bias_u = Embedding(self.n_users, 1, input_length=1, embeddings_initializer='zeros', embeddings_regularizer=reg_bias)(input_u) bias_u_r = Flatten()(bias_u) bias_i = Embedding(self.n_items, 1, input_length=1, embeddings_initializer='zeros', embeddings_regularizer=reg_bias)(input_i) bias_i_r = Flatten()(bias_i) added = Concatenate()([bias_u_r, bias_i_r, mf]) mf_out = BiasLayer(bias_initializer=bias_init, name='bias', activation='sigmoid')(added) self.model = Model(inputs=[input_u, input_i], outputs=mf_out) self.compile()
def declare_model(vocabulary_size, embedding_dim): word_input = Input(shape=(1, )) w_emb = Embedding(vocabulary_size, embedding_dim)(word_input) context_input = Input(shape=(1, )) c_emb = Embedding(vocabulary_size, embedding_dim)(context_input) dist = Dot(axes=2)([w_emb, c_emb]) dist = Reshape((1, ), input_shape=(1, 1))(dist) o = Activation('sigmoid')(dist) model = Model(inputs=[word_input, context_input], outputs=o) return model
def add_context2query_layer(query_embedding, biattention_matrix, attention_level, max_query_len, max_doc_len): # Following the context-to-query implementation of BiDAF model # query_embedding: batch_size * max_query_len * nb_filters # biattention_matrix: batch_size * max_query_len * max_doc_len norm_biattention = Softmax(axis=-2)(biattention_matrix) # Activation('softmax', axis=-2)(biattention_matrix) reshape_norm_biatt = Reshape(( max_doc_len, max_query_len, ))(norm_biattention) context_embedding = Dot(axes=[-1, -2], name="context2query-%d" % attention_level)( [reshape_norm_biatt, query_embedding]) return context_embedding
def lstm_network(x1_shape, input_dims, embedding_matrix): input_a = Input(shape=x1_shape[1:]) input_b = Input(shape=x1_shape[1:]) def create_base_network(x1_shape, input_dims, embedding_matrix): inp = Input(x1_shape[1:]) embedding_block = Embedding(input_dim=input_dims, output_dim=300, weights=[embedding_matrix], input_length=40, trainable=False)(inp) #embedding_block = LSTM(32, return_sequences=True)(embedding_block) #embedding_block = BatchNormalization()(embedding_block) embedding_block = LSTM(32, return_sequences=True)(embedding_block) embedding_block = Lambda(lambda x: K.sum(x, axis=1))(embedding_block) embedding_block = Model(inputs=inp, outputs=embedding_block) return embedding_block base_network = create_base_network(x1_shape, input_dims, embedding_matrix) processed_a = base_network(input_a) # shared processed_b = base_network(input_b) # layers distance1 = Lambda( lambda x: K.mean(K.abs(x[1] - x[0]), axis=1, keepdims=True))([ processed_a, processed_b, ]) distance1 = BatchNormalization()(distance1) distance2 = Dot(axes=1, normalize=True)([ processed_a, processed_b, ]) merged = Concatenate()([processed_a, processed_b, distance2]) merged = Dense(32)(merged) merged = PReLU()(merged) merged = BatchNormalization()(merged) merged = Dense(32)(merged) merged = PReLU()(merged) merged = BatchNormalization()(merged) merged = Dense(1, activation="sigmoid")(merged) model = Model(inputs=[input_a, input_b], outputs=merged) model.compile(loss="binary_crossentropy", optimizer=Adam(1e-3), metrics=["accuracy"]) return model
def create_factorization_model(user_size, item_size, hidden_size, **kwargs): ''' Basic generalization of matrix factorization models. user_size and item_size are number of users and items, respectively hidden_size determines number of attributes optional arguments: regularization: amount of L2 regularization to apply, none by default activations: vector of activations for user and item hidden layers. default is "linear", use "relu" for non-negative matrix factorization more_complex: learns additional weights for each attribute instead of taking simple dot product. False by default. useIntercepts: whether to use user and item intercepts. common practice, but false by default here squash: if output is bounded, it can help to tell that to the model, but then need to normalize output to fall between 0 and 1. False by default. ''' inputU = Input(shape=(user_size, ), name="user_1hot") inputI = Input(shape=(item_size, ), name="item_1hot") regularization = kwargs.get('regularization') regularizer = regularizers.l2(regularization) if regularization else None if hidden_size: activations = kwargs.get('activations') or ["linear", "linear"] print(activations) hiddenU = Dense(hidden_size, activation=activations[0], name="user_hidden", kernel_regularizer=regularizer, use_bias=True)(inputU) hiddenI = Dense(hidden_size, activation=activations[1], name="item_hidden", kernel_regularizer=regularizer, use_bias=True)(inputI) output = Dense(1, kernel_regularizer=to_one(regularization or .01))( Multiply(name="aspect_points")( [hiddenU, hiddenI])) if kwargs.get('more_complex') else Dot( axes=1)([hiddenU, hiddenI]) if kwargs.get('useIntercepts'): intercept = Dense(1, use_bias=True, kernel_regularizer=regularizer)( Concatenate()([inputU, inputI])) output = Add(name="prediction")([output, intercept]) else: output = Dense(1, name="prediction", use_bias=True, kernel_regularizer=regularizer)(Concatenate()( [inputU, inputI])) # same as user_item model if kwargs.get('squash'): output = Dense(1)(Dense(1, activation="sigmoid")(output)) model = Model(inputs=[inputU, inputI], outputs=[output]) model.compile(optimizer='adam', loss='mse') return model
def MFModel(users_num, movies_num, latent_dim = 666): user_input = Input(shape = [1]) item_input = Input(shape= [1]) user_vec = Embedding(users_num + 1, latent_dim)(user_input) user_vec = Flatten()(user_vec) item_vec = Embedding(users_num + 1, latent_dim)(item_input) item_vec = Flatten()(item_vec) user_bias = Embedding(users_num + 1, 1, embeddings_initializer = "zeros")(user_input) user_bias = Flatten()(user_bias) item_bias = Embedding(movies_num + 1, 1, embeddings_initializer = "zeros")(item_input) item_bias = Flatten()(item_bias) r_hat = Dot(axes = 1)([user_bias, item_bias]) r_hat = Add()([r_hat, user_bias, item_bias]) model = keras.models.Model([user_input, item_input], r_hat) return model
def alignments2vec(x, y, V, tokenizer): # inputs w_inputs = Input(shape=(1, ), dtype='int32') w = Embedding(V, vec_length)(w_inputs) # context c_inputs = Input(shape=(1, ), dtype='int32') c = Embedding(V, vec_length)(c_inputs) o = Dot(axes=2)([w, c]) o = Reshape((1, ), input_shape=(1, 1))(o) o = Activation('sigmoid')(o) SkipGram = Model(inputs=[w_inputs, c_inputs], outputs=o) SkipGram.summary() SkipGram.compile(loss='binary_crossentropy', optimizer='adam') history = SkipGram.fit_generator( generate_vec_batch(x, y, batch_size, tokenizer, SkipGram), steps_per_epoch=steps_per_epoch, epochs=300, #len(x_train)//batch_size//steps_per_epoch, validation_data=generate_vec_batch(x, y, batch_size, tokenizer, SkipGram), validation_steps=steps_per_epoch) print(history.history.keys()) # summarize history for accuracy # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() f = open('alignment_vec.txt', 'w') f.write('{} {}\n'.format(V - 1, vec_length)) vectors = SkipGram.get_weights()[0] for word, i in tokenizer.word_index.items(): f.write('{} {}\n'.format(word, ' '.join(map(str, list(vectors[i, :]))))) f.close() w2v = gensim.models.KeyedVectors.load_word2vec_format( './alignment_vec.txt', binary=False) print(w2v.most_similar(positive=['a' * word_length]))
def maxpool_cosine_score_model(self, input_dim): """Define a model with bi-LSTM layers and without attention.""" input_a = Input(shape=(input_dim,)) input_b = Input(shape=(input_dim,)) if self.type_of_weights == "shared": drop_layer = Dropout(self.emb_drop_val) drop_a = drop_layer(input_a) drop_b = drop_layer(input_b) embedding_layer = self.create_embedding_layer(self.max_sequence_length) emb_a = embedding_layer(drop_a) emb_b = embedding_layer(drop_b) lstm_layer = self.create_lstm_layer_max_pooling(self.max_sequence_length) lstm_a = lstm_layer(emb_a) lstm_b = lstm_layer(emb_b) elif self.type_of_weights == "separate": drop_layer_a = Dropout(self.emb_drop_val) drop_layer_b = Dropout(self.emb_drop_val) drop_a = drop_layer_a(input_a) drop_b = drop_layer_b(input_b) embedding_layer_a = self.create_embedding_layer(self.max_sequence_length) embedding_layer_b = self.create_embedding_layer(self.max_sequence_length) emb_a = embedding_layer_a(drop_a) emb_b = embedding_layer_b(drop_b) lstm_layer_a = self.create_lstm_layer_max_pooling(self.max_sequence_length) lstm_layer_b = self.create_lstm_layer_max_pooling(self.max_sequence_length) lstm_a = lstm_layer_a(emb_a) lstm_b = lstm_layer_b(emb_b) if self.pooling is None or self.pooling == "max": lstm_a = Lambda(self.max_pooling, output_shape=self.max_pooling_output_shape, name="max_pooling_a")(lstm_a) lstm_b = Lambda(self.max_pooling, output_shape=self.max_pooling_output_shape, name="max_pooling_b")(lstm_b) if self.type_of_weights == "shared": dropout = Dropout(self.maxpool_drop_val) lstm_a = dropout(lstm_a) lstm_b = dropout(lstm_b) elif self.type_of_weights == "separate": dropout_a = Dropout(self.maxpool_drop_val) dropout_b = Dropout(self.maxpool_drop_val) lstm_a = dropout_a(lstm_a) lstm_b = dropout_b(lstm_b) cosine = Dot(normalize=True, axes=-1)([lstm_a, lstm_b]) model = Model([input_a, input_b], cosine, name="score_model") return model
def get_model(): logging.info("Building the model") input1 = Input(shape=(MAXLEN, 21), dtype='float32', name='input1') input2 = Input(shape=(MAXLEN, 21), dtype='float32', name='input2') feature_model = get_dense_features() vector1 = feature_model(input1) vector2 = feature_model(input2) net = Dot(axes=1)([vector1, vector2]) net = Activation('sigmoid')(net) model = Model(inputs=[input1, input2], outputs=net) model.summary() logging.info('Compiling the model') optimizer = RMSprop() model.compile(optimizer=optimizer, loss='binary_crossentropy') logging.info('Compilation finished') return model
def add_query2context_layer(doc_embedding, biattention_matrix, attention_level, max_query_len, max_doc_len, nb_filters): # query_embedding: batch_size * max_doc_len * nb_filters # biattention_matrix: batch_size * max_query_len * max_doc_len max_biattention = Lambda(max_pooling, output_shape=lambda inp_shp: ( inp_shp[0], inp_shp[2], ))(biattention_matrix) norm_biatt = Activation('softmax')(max_biattention) reshape_doc_embedding = Reshape(( nb_filters, max_doc_len, ))(doc_embedding) context_embedding = Dot(axes=-1, name="query2context-%d" % attention_level)( [reshape_doc_embedding, norm_biatt]) return context_embedding
def SiameseLSTM(max_token_length, hidden_size, embedding_size=300): text_input_1 = Input(shape=(max_token_length, embedding_size), name='text_1') text_mask_1 = Masking(mask_value=0.0, name='text_mask_1')(text_input_1) # text_dropout_1 = Dropout(.5, name='text_dropout_1')(text_mask_1) text_input_2 = Input(shape=(max_token_length, embedding_size), name='text_2') text_mask_2 = Masking(mask_value=0.0, name='text_mask_2')(text_input_2) # text_dropout_2 = Dropout(.5, name='text_dropout_2')(text_mask_2) lstm_1_a = Bidirectional( GRU(units=hidden_size, return_sequences=True, name='RNN_1_a'))(text_mask_1) lstm_1_b = Bidirectional( GRU(units=hidden_size, return_sequences=False, name='RNN_1_b'))(lstm_1_a) """ lstm_1_c = Bidirectional(GRU(units=hidden_size, return_sequences=False, name='RNN_1_c'))(lstm_1_b) """ lstm_2_a = Bidirectional( GRU(units=hidden_size, return_sequences=True, name='RNN_2_a'))(text_mask_2) lstm_2_b = Bidirectional( GRU(units=hidden_size, return_sequences=False, name='RNN_2_b'))(lstm_2_a) """ lstm_2_c = Bidirectional(GRU(units=hidden_size, return_sequences=False, name='RNN_2_c'))(lstm_2_b) """ cosine_similarity = Dot(axes=1, normalize=True, name='cosine_similarity')([lstm_1_b, lstm_2_b]) model = Model(inputs=[text_input_1, text_input_2], outputs=cosine_similarity) return model
def get(args): # get figuration # input ID sequence w_input = Input(shape=(1,), dtype='int32', name='input') # embedding sequence w = Embedding(input_dim=args.vocabulary_size, output_dim=args.embedding_size, init='glorot_uniform')(w_input) # context c_input = Input(shape=(1,), dtype='int32', name='context') c = Embedding(input_dim=args.vocabulary_size, output_dim=args.embedding_size, init='glorot_uniform')(c_input) # output (cos similarity) output_ = Dot(axes=2)([w, c]) output_ = Reshape((1,), input_shape=(1, 1))(output_) output = Activation('sigmoid')(output_) # model SkipGram_model = Model(inputs=[w_input, c_input], outputs=output) # initialize the optimizer ADAM_ = Adam(lr=args.lr) # compile the SkipGram_model SkipGram_model.compile(loss='binary_crossentropy', optimizer=ADAM_) # save_sample_image the picture of the SkipGram_model print(time.strftime('%Y-%m-%d %H:%M:%S') + ' Save picture of SkipGram_model architecture') plot_model(SkipGram_model, show_shapes=True, to_file=args.model_picture) # show the information of the SkipGram_model print(time.strftime('%Y-%m-%d %H:%M:%S') + " Model summary") print(SkipGram_model.summary()) # return the skip gram model return SkipGram_model