x_decoder = Convolution1D(256, kernel_size=3, activation='relu', padding='causal')(decoder_inputs) x_decoder = Convolution1D(256, kernel_size=3, activation='relu', padding='causal', dilation_rate=2)(x_decoder) x_decoder = Convolution1D(256, kernel_size=3, activation='relu', padding='causal', dilation_rate=4)(x_decoder) # Attention attention = Dot(axes=[2, 2])([x_decoder, x_encoder]) attention = Activation('softmax')(attention) context = Dot(axes=[2, 1])([attention, x_encoder]) decoder_combined_context = Concatenate(axis=-1)([context, x_decoder]) decoder_outputs = Convolution1D(64, kernel_size=3, activation='relu', padding='causal')(decoder_combined_context) decoder_outputs = Convolution1D(64, kernel_size=3, activation='relu', padding='causal')(decoder_outputs) # Output decoder_dense = Dense(num_decoder_tokens, activation='softmax')
def _get_outputs(self, inputs, emb): outputs = {} losses = {} if 'head' in self.params.targets: dep_arc_emb = Dropout(self.params.dense_droput)(Dense( self.params.head_hidden_size, activation='tanh')(emb)) head_arc_emb = Dropout(self.params.dense_droput)(Dense( self.params.head_hidden_size, activation='tanh')(emb)) head_pred = Dot(axes=2)([dep_arc_emb, head_arc_emb]) head_pred = Activation('softmax', name='head')(head_pred) outputs['head'] = head_pred losses['head'] = self.head_loss if 'deprel' in self.params.targets: dep_rel_emb = Dropout(self.params.dense_droput)(Dense( self.params.deprel_hidden_size, activation='tanh')(emb)) head_rel_emb = Dropout(self.params.dense_droput)(Dense( self.params.deprel_hidden_size, activation='tanh')(emb)) n_deprel = self.targets_factory.encoders['deprel'].vocab_size head_emb_T = Lambda(lambda x: K.permute_dimensions(x, (0, 2, 1)))( head_rel_emb) deprel_pred = Dot(axes=2)([head_pred, head_emb_T]) deprel_pred = Concatenate(axis=2)([deprel_pred, dep_rel_emb]) deprel_pred = Dropout(self.params.dense_droput)( Dense(n_deprel)(deprel_pred)) deprel_pred = Activation('softmax', name='deprel')(deprel_pred) outputs['deprel'] = deprel_pred losses['deprel'] = categorical_crossentropy if 'lemma' in self.params.targets: lemma_pred = TimeDistributed( LemmaModel(self.params, self.features_factory, self.targets_factory).model, name='lemma', )(Concatenate()([inputs['char'], emb])) outputs['lemma'] = lemma_pred losses['lemma'] = self.lemma_loss if 'xpostag' in self.params.targets: n_xpos = self.targets_factory.encoders['xpostag'].vocab_size xpos_pred = Dropout(self.params.dense_droput)(Dense( self.params.xpos_hidden_size, activation='tanh')(emb)) xpos_pred = Dropout(self.params.dense_droput)( Dense(n_xpos)(xpos_pred)) xpos_pred = Activation('softmax', name='xpostag')(xpos_pred) outputs['xpostag'] = xpos_pred losses['xpostag'] = categorical_crossentropy if 'upostag' in self.params.targets: n_pos = self.targets_factory.encoders['upostag'].vocab_size pos_pred = Dropout(self.params.dense_droput)(Dense( self.params.pos_hidden_size, activation='tanh')(emb)) pos_pred = Dropout(self.params.dense_droput)( Dense(n_pos)(pos_pred)) pos_pred = Activation('softmax', name='upostag')(pos_pred) outputs['upostag'] = pos_pred losses['upostag'] = categorical_crossentropy if 'feats' in self.params.targets: n_feat = self.targets_factory.encoders['feats'].vocab_size feat_pred = Dropout(self.params.dense_droput)(Dense( self.params.feat_hidden_size, activation='tanh')(emb)) feat_pred = Dropout(self.params.dense_droput, name='feats')(Dense(n_feat)(feat_pred)) outputs['feats'] = feat_pred losses['feats'] = self.feats_loss if 'sent' in self.params.targets: sent_pred = RemoveMask()(emb) sent_pred = GlobalMaxPooling1D()(sent_pred) outputs['sent'] = sent_pred losses['sent'] = None if 'semrel' in self.params.targets: n_semrel = self.targets_factory.encoders['semrel'].vocab_size semrel_pred = Dropout(self.params.dense_droput)(Dense( self.params.semrel_hidden_size, activation='tanh')(emb)) semrel_pred = Dropout(self.params.dense_droput)( Dense(n_semrel)(semrel_pred)) semrel_pred = Activation('softmax', name='semrel')(semrel_pred) outputs['semrel'] = semrel_pred losses['semrel'] = categorical_crossentropy return outputs, losses
def NeuralCollaborativeFiltering(n_users, n_items, n_factors, min_rating, max_rating): # Item Layer item_input = Input(shape=[1], name='Item') # Item Embedding MF item_embedding_mf = Embedding(n_items, n_factors, embeddings_regularizer=l2(1e-6), embeddings_initializer='he_normal', name='ItemEmbeddingMF')(item_input) item_vec_mf = Flatten(name='FlattenItemMF')(item_embedding_mf) # Item embedding MLP item_embedding_mlp = Embedding(n_items, n_factors, embeddings_regularizer=l2(1e-6), embeddings_initializer='he_normal', name='ItemEmbeddingMLP')(item_input) item_vec_mlp = Flatten(name='FlattenItemMLP')(item_embedding_mlp) # User Layer user_input = Input(shape=[1], name='User') # User Embedding MF user_embedding_mf = Embedding(n_users, n_factors, embeddings_regularizer=l2(1e-6), embeddings_initializer='he_normal', name='UserEmbeddingMF')(user_input) user_vec_mf = Flatten(name='FlattenUserMF')(user_embedding_mf) # User Embedding MF user_embedding_mlp = Embedding(n_users, n_factors, embeddings_regularizer=l2(1e-6), embeddings_initializer='he_normal', name='UserEmbeddingMLP')(user_input) user_vec_mlp = Flatten(name='FlattenUserMLP')(user_embedding_mlp) # Multiply MF paths DotProductMF = Dot(axes=1, name='DotProductMF')([item_vec_mf, user_vec_mf]) # Concat MLP paths ConcatMLP = Concatenate(name='ConcatMLP')([item_vec_mlp, user_vec_mlp]) # Use Dense to learn non-linear dense representation Dense_1 = Dense(50, name="Dense1")(ConcatMLP) Dense_2 = Dense(20, name="Dense2")(Dense_1) # Concatenate MF and MLP paths Concat = Concatenate(name="ConcatAll")([DotProductMF, Dense_2]) # Use Dense to learn non-linear dense representation Pred = Dense(1, name="Pred")(Concat) # Item Bias item_bias = Embedding(n_items, 1, embeddings_regularizer=l2(1e-5), name='ItemBias')(item_input) item_bias_vec = Flatten(name='FlattenItemBiasE')(item_bias) # User Bias user_bias = Embedding(n_users, 1, embeddings_regularizer=l2(1e-5), name='UserBias')(user_input) user_bias_vec = Flatten(name='FlattenUserBiasE')(user_bias) # Pred with bias added PredAddBias = Add(name="AddBias")([Pred, item_bias_vec, user_bias_vec]) # Scaling for each user y = Activation('sigmoid')(PredAddBias) rating_output = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(y) # Model Creation model = Model([user_input, item_input], rating_output, name="NeuralCF") # Compile Model model.compile(loss='mean_squared_error', optimizer="adam") return model
def negative_samples(input_length, input_dim, output_length, output_dim, hidden_dim, ns_amount, learning_rate, drop_rate): q_encoder_input = Input(shape=(input_length, input_dim)) r_decoder_input = Input(shape=(output_length, output_dim)) weight_data_r = Input(shape=(1, )) weight_data_w = Input(shape=(1, ns_amount)) weight_data_w_list = Lambda(lambda x: tf.split( x, num_or_size_splits=ns_amount, axis=2))(weight_data_w) fixed_r_decoder_input = adding_weight( output_length, output_dim)([r_decoder_input, weight_data_r]) w_decoder_input = Input(shape=(output_length, output_dim, ns_amount)) w_decoder_input_list = Lambda(lambda x: tf.split( x, num_or_size_splits=ns_amount, axis=3))(w_decoder_input) fixed_w_decoder_input = [] for i in range(ns_amount): w_decoder_input_list[i] = Reshape( (output_length, output_dim))(w_decoder_input_list[i]) weight_data_w_list[i] = Reshape((1, ))(weight_data_w_list[i]) fixed_w_decoder_input.append( adding_weight(output_length, output_dim)( [w_decoder_input_list[i], weight_data_w_list[i]])) encoder = Bidirectional(GRU(hidden_dim), merge_mode="ave", name="bidirectional1") q_encoder_output = encoder(q_encoder_input) q_encoder_output = Dropout(rate=drop_rate, name="dropout1")(q_encoder_output) decoder = Bidirectional(GRU(hidden_dim), merge_mode="ave", name="bidirectional2") r_decoder_output = decoder(fixed_r_decoder_input) r_decoder_output = Dropout(rate=drop_rate, name="dropout2")(r_decoder_output) output_vec = Concatenate( axis=1, name="dropout_con")([q_encoder_output, r_decoder_output]) output_hid = Dense(hidden_dim, name="output_hid")(output_vec) similarity = Dense(1, name="similarity")(output_hid) # Difference between kernel, bias, and activity regulizers in Keras # https://stats.stackexchange.com/questions/383310/difference-between-kernel-bias-and-activity-regulizers-in-keras w_decoder_output_list = [] for i in range(ns_amount): w_decoder_output = decoder(fixed_w_decoder_input[i]) w_decoder_output = Dropout(rate=drop_rate)(w_decoder_output) w_decoder_output_list.append(w_decoder_output) similarities = [similarity] for i in range(ns_amount): similarities.append( Dot(axes=1, normalize=True)([q_encoder_output, w_decoder_output_list[i]])) loss_data = Lambda(lambda x: loss_c(x))(similarities) model = Model([ q_encoder_input, r_decoder_input, w_decoder_input, weight_data_r, weight_data_w ], similarities[0]) ada = adam(lr=learning_rate) model.compile(optimizer=ada, loss=lambda y_true, y_pred: loss_data) return model
import os DIRNAME = os.path.dirname(__file__) DATA_PATH = os.path.join(DIRNAME, "../data/lfw-deepfunneled") people_dict = load_people_dict() X_train, y_train = load_data(os.path.join(DATA_PATH, "../pairsDevTrain.txt"), people_dict) X_val, y_val = load_data(os.path.join(DATA_PATH, "../pairsDevTest.txt"), people_dict) for name, merge in zip( ["Cat", "Add", "Sub", "Dot", "mult"], [Concatenate(), Add(), Subtract(), Dot(-1, True), Multiply()]): input_a = Input(shape=(2048, )) input_b = Input(shape=(2048, )) diff = merge([input_a, input_b]) x = Dense(512, activation="relu")(diff) # x = Dropout(0.5)(x) # x = Dense(512, activation="relu")(x) # x = Dropout(0.5)(x) # x = Dense(256, activation="relu")(x) # x = Dropout(0.5)(x) # x = Dense(128, activation="relu")(x) x = Dropout(0.2)(x) pred = Dense(1, activation="sigmoid")(x)
def build_model(emb_matrix, max_sequence_length): ############# Embedding Process ############ # The embedding layer containing the word vectors emb_layer = Embedding(input_dim=emb_matrix.shape[0], output_dim=emb_matrix.shape[1], weights=[emb_matrix], input_length=max_sequence_length, trainable=False) # attention model ########## # Define inputs seq1 = Input(shape=(max_sequence_length, )) seq2 = Input(shape=(max_sequence_length, )) # Run inputs through embedding emb1 = emb_layer(seq1) emb2 = emb_layer(seq2) emb_distributed = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(1e-5), bias_regularizer=l2(1e-5))) emb1 = Dropout(0.4)(emb_distributed(emb1)) emb2 = Dropout(0.4)(emb_distributed(emb2)) # score each words and calculate score matrix F_seq1, F_seq2 = emb1, emb2 for i in range(2): scoreF = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(1e-5), bias_regularizer=l2(1e-5))) F_seq1 = Dropout(0.4)(scoreF(F_seq1)) F_seq2 = Dropout(0.4)(scoreF(F_seq2)) cross = Dot(axes=(2, 2))([F_seq1, F_seq2]) # normalize score matrix, encoder premesis and get alignment c1 = Lambda(lambda x: keras.activations.softmax(x))(cross) c2 = Permute((2, 1))(cross) c2 = Lambda(lambda x: keras.activations.softmax(x))(c2) seq1Align = Dot((2, 1))([c1, emb2]) seq2Align = Dot((2, 1))([c2, emb1]) # Concat original and alignment, score each pair of alignment seq1Align = concatenate([emb1, seq1Align]) seq2Align = concatenate([emb2, seq2Align]) for i in range(2): scoreG = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(1e-5), bias_regularizer=l2(1e-5))) seq1Align = scoreG(seq1Align) seq2Align = scoreG(seq2Align) seq1Align = Dropout(0.4)(seq1Align) seq2Align = Dropout(0.4)(seq2Align) # Sum all these scores, and make final judge according to sumed-score sumwords = Lambda(lambda x: K.reshape(K.sum(x, axis=1, keepdims=True), (-1, 200))) V_seq1 = sumwords(seq1Align) V_seq2 = sumwords(seq2Align) final = concatenate([V_seq1, V_seq2]) for i in range(2): final = Dense(200, activation='relu', kernel_regularizer=l2(1e-5), bias_regularizer=l2(1e-5))(final) final = Dropout(0.4)(final) final = BatchNormalization()(final) pred = Dense(1, activation='sigmoid')(final) # model = Model(inputs=[seq1, seq2, magic_input, distance_input], outputs=pred) model = Model(inputs=[seq1, seq2], outputs=pred) model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc']) print(model.summary()) return model
shared_conv2 = DF(input_shape=(5000, 1), emb_size=emb_size) anchor = Input((5000, 1), name='anchor') positive = Input((5000, 1), name='positive') negative = Input((5000, 1), name='negative') a = shared_conv2(anchor) p = shared_conv2(positive) n = shared_conv2(negative) # The Dot layer in Keras now supports built-in Cosine similarity using the normalize = True parameter. # From the Keras Docs: # keras.layers.Dot(axes, normalize=True) # normalize: Whether to L2-normalize samples along the dot product axis before taking the dot product. # If set to True, then the output of the dot product is the cosine proximity between the two samples. pos_sim = Dot(axes=-1, normalize=True)([a, p]) neg_sim = Dot(axes=-1, normalize=True)([a, n]) # customized loss loss = Lambda(cosine_triplet_loss, output_shape=(1, ))([pos_sim, neg_sim]) model_triplet = Model(inputs=[anchor, positive, negative], outputs=loss) print model_triplet.summary() opt = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model_triplet.compile(loss=identity_loss, optimizer=opt) batch_size = batch_size_value # At first epoch we don't generate hard triplets gen_hard = SemiHardTripletGenerator(Xa_train, Xp_train, batch_size, all_traces,
def build(self): ''' 1. Build Code Representation Model ''' logger.debug('Building Code Representation Model') methname = Input(shape=(self.data_params['methname_len'],), dtype='int32', name='methname') apiseq= Input(shape=(self.data_params['apiseq_len'],),dtype='int32',name='apiseq') tokens=Input(shape=(self.data_params['tokens_len'],),dtype='int32',name='tokens') sbt=Input(shape=(self.data_params['sbt_len'],),dtype='int32',name='sbt') ## method name representation ## #1.embedding init_emb_weights = np.load(self.config['workdir']+self.model_params['init_embed_weights_methname']) if self.model_params['init_embed_weights_methname'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [init_emb_weights] embedding = Embedding(input_dim=self.data_params['n_methodname_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero=False,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers in the model must support masking, otherwise an exception will be raised. name='embedding_methname') methname_embedding = embedding(methname) dropout = Dropout(0.25,name='dropout_methname_embed') methname_dropout = dropout(methname_embedding) methname_out = AttentionLayer(name = 'methname_attention_layer')(methname_dropout) ## API Sequence Representation ## #1.embedding init_emb_weights = np.load(self.config['workdir']+self.model_params['init_embed_weights_api']) if self.model_params['init_embed_weights_api'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [init_emb_weights] embedding = Embedding(input_dim=self.data_params['n_api_words'], output_dim=self.model_params.get('n_embed_dims', 100), #weights=weights, mask_zero=False,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_apiseq') apiseq_embedding = embedding(apiseq) dropout = Dropout(0.25,name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) api_out = AttentionLayer(name = 'API_attention_layer')(apiseq_dropout) ## Tokens Representation ## #1.embedding init_emb_weights = np.load(self.config['workdir']+self.model_params['init_embed_weights_tokens']) if self.model_params['init_embed_weights_tokens'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [init_emb_weights] embedding = Embedding(input_dim=self.data_params['n_tokens_words'], output_dim=self.model_params.get('n_embed_dims'), weights=init_emb_weights, mask_zero=False,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_tokens') tokens_embedding = embedding(tokens) dropout = Dropout(0.25,name='dropout_tokens_embed') tokens_dropout= dropout(tokens_embedding) tokens_out = AttentionLayer(name = 'Tokens_attention_layer')(tokens_dropout) ## Sbt Representation ## #1.embedding init_emb_weights = np.load(self.config['workdir']+self.model_params['init_embed_weights_sbt']) if self.model_params['init_embed_weights_sbt'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [init_emb_weights] embedding = Embedding(input_dim=self.data_params['n_sbt_words'], output_dim=self.model_params.get('n_embed_dims'), weights=init_emb_weights, mask_zero=False,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_sbt') sbt_embedding = embedding(sbt) dropout = Dropout(0.25,name='dropout_sbt_embed') sbt_dropout= dropout(sbt_embedding) sbt_out = AttentionLayer(name = 'AST_attention_layer')(sbt_dropout) # merge code# merged_code= Concatenate(name='code_merge',axis=1)([methname_out,api_out,tokens_out,sbt_out]) #(122,200) ''' 2. Build Desc Representation Model ''' ## Desc Representation ## logger.debug('Building Desc Representation Model') desc = Input(shape=(self.data_params['desc_len'],), dtype='int32', name='desc') #1.embedding init_emb_weights = np.load(self.config['workdir']+self.model_params['init_embed_weights_desc']) if self.model_params['init_embed_weights_desc'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [init_emb_weights] embedding = Embedding(input_dim=self.data_params['n_desc_words'], output_dim=self.model_params.get('n_embed_dims'), weights=init_emb_weights, mask_zero=False,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_desc') desc_embedding = embedding(desc) dropout = Dropout(0.25,name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) merged_desc = AttentionLayer(name = 'desc_attention_layer')(desc_dropout) #AP networks# attention = COAttentionLayer(name='coattention_layer') # (122,60) attention_out = attention([merged_code,merged_desc]) # out_1 column wise gmp_1=GlobalMaxPooling1D(name='blobalmaxpool_colum') att_1=gmp_1(attention_out) activ1=Activation('softmax',name='AP_active_colum') att_1_next=activ1(att_1) dot1=Dot(axes=1,normalize=False,name='column_dot') desc_out = dot1([att_1_next, merged_desc]) # out_2 row wise attention_trans_layer = Lambda(lambda x: K.permute_dimensions(x, (0,2,1)),name='trans_coattention') attention_transposed = attention_trans_layer(attention_out) gmp_2=GlobalMaxPooling1D(name='blobalmaxpool_row') att_2=gmp_2(attention_transposed) activ2=Activation('softmax',name='AP_active_row') att_2_next=activ2(att_2) dot2=Dot(axes=1,normalize=False,name='row_dot') code_out = dot2([att_2_next, merged_code]) self._code_repr_model=Model(inputs=[methname, apiseq,tokens,sbt,desc],outputs=[code_out],name='desc_repr_model') # self._desc_repr_model=desc_repr_model print('\nsummary of code representation model') self._code_repr_model.summary() fname=self.config['workdir']+'models/'+self.model_params['model_name']+'/_desc_repr_model.png' self._desc_repr_model=Model(inputs=[methname,apiseq,tokens,sbt,desc],outputs=[desc_out],name='code_repr_model') # self._code_repr_model=code_repr_model print('\nsummary of description representation model') self._desc_repr_model.summary() """ 3: calculate the cosine similarity between code and desc """ logger.debug('Building similarity model') code_repr=self._code_repr_model([methname,apiseq,tokens,sbt,desc]) desc_repr=self._desc_repr_model([methname,apiseq,tokens,sbt,desc]) cos_sim=Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methname,apiseq,tokens,sbt,desc], outputs=[cos_sim],name='sim_model') self._sim_model=sim_model #for model evaluation print ("\nsummary of similarity model") self._sim_model.summary() fname=self.config['workdir']+'models/'+self.model_params['model_name']+'/_sim_model.png' #plot_model(self._sim_model, show_shapes=True, to_file=fname) ''' 4:Build training model ''' good_sim = sim_model([self.methname,self.apiseq,self.tokens,self.sbt, self.desc_good])# similarity of good output bad_sim = sim_model([self.methname,self.apiseq,self.tokens,self.sbt, self.desc_bad])#similarity of bad output loss = Lambda(lambda x: K.maximum(1e-6, self.model_params['margin'] - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) logger.debug('Building training model') self._training_model=Model(inputs=[self.methname,self.apiseq,self.tokens,self.sbt, self.desc_good,self.desc_bad], outputs=[loss],name='training_model') print ('\nsummary of training model') self._training_model.summary() fname=self.config['workdir']+'models/'+self.model_params['model_name']+'/_training_model.png'
def __init__(self, texts_size=400, embedding_size=300, vocab_size=60230, texts_autoencoder_units=60, leaves_size=300, leaves_autoencoder_units=30, conv2d_filters=10, conv2d_kernel_size=3): # texts autoencoder embedding_matrix = load_embedding_matrix() texts_in = Input(shape=(texts_size, ), dtype='int32', name='texts_in') embedded_texts = Embedding(output_dim=embedding_size, input_dim=vocab_size, input_length=texts_size, weights=[embedding_matrix], trainable=True)(texts_in) texts_encoder_out = LSTM(texts_autoencoder_units, activation='tanh', input_shape=(texts_size, embedding_size), name='texts_encoder_out')(embedded_texts) # texts_encoder_out = BatchNormalization()(texts_encoder_out) self.texts_encoder = Model(inputs=texts_in, outputs=texts_encoder_out) self.texts_encoder.compile(optimizer='adam', loss='mse') hidden = RepeatVector(texts_size)(texts_encoder_out) hidden = LSTM(texts_autoencoder_units, activation='relu', return_sequences=True)(hidden) texts_out = TimeDistributed(Dense(units=embedding_size), name='texts_decoder_out')(hidden) # texts_out = Flatten(name='texts_decoder_out')(texts_out) # leaves autoencoder leaves_in = Input(shape=(leaves_size, ), dtype='float32', name='leaves_in') leaves_encoder_out = Dense(units=leaves_autoencoder_units, activation='relu', name='leaves_encoder_out')(leaves_in) leaves_encoder_bn = BatchNormalization()(leaves_encoder_out) self.leaves_encoder = Model(inputs=leaves_in, outputs=leaves_encoder_out, name='leaves_encoder') self.leaves_encoder.compile(optimizer='adam', loss='mse') leaves_out = Dense(units=leaves_size, activation='relu', name='leaves_decoder_out')(leaves_encoder_bn) # attention-mechanism based translation # key = Dense(units=texts_autoencoder_units, name='key')(texts_encoder_out) key = texts_encoder_out expanded_key = Lambda(lambda x: K.expand_dims(x, axis=-1))(key) # value = Dense(units=texts_autoencoder_units, name='value')(texts_encoder_out) value = texts_encoder_out repeated_value = RepeatVector(n=leaves_autoencoder_units)(value) # query = Dense(units=leaves_autoencoder_units, name='query')(leaves_encoder_out) query = leaves_encoder_bn expanded_query = Lambda(lambda x: K.expand_dims(x, axis=-1))(query) attention = Dot(axes=[2, 2], normalize=True)([expanded_query, expanded_key]) attention = Activation(activation='softmax', name='attention_weight')(attention) self.attention_model = Model(inputs=[texts_in, leaves_in], outputs=[attention], name='attention_model') weighted_value = Multiply()([attention, repeated_value]) context = Lambda(lambda x: K.sum(x, axis=-1, keepdims=False))( weighted_value) gen_leaves = Dense(units=leaves_size, activation='relu', name='gen_leaves')(context) # joint representation: cooccurrence matrix value_probability = Softmax(axis=-1, name='value_probability')(weighted_value) repeated_query = Lambda(lambda x: K.repeat_elements( x, rep=texts_autoencoder_units, axis=-1))(expanded_query) query_probability = Softmax(axis=1, name='query_probability')(repeated_query) cooccurrence = Multiply(name='cooccurrence')( [value_probability, query_probability]) # cooccurrence matrix is used for classification cooccurrence = Lambda(lambda x: K.expand_dims(x, axis=-1))( cooccurrence) hidden = Conv2D(filters=conv2d_filters, kernel_size=conv2d_kernel_size, strides=(1, 1), padding='valid', dilation_rate=(2, 2), activation='relu')(cooccurrence) hidden = MaxPool2D()(hidden) hidden = BatchNormalization()(hidden) hidden = Conv2D(filters=conv2d_filters, kernel_size=conv2d_kernel_size, strides=(1, 1), padding='valid', dilation_rate=(2, 2), activation='relu')(hidden) hidden = MaxPool2D()(hidden) hidden = BatchNormalization()(hidden) hidden = Flatten()(hidden) hidden = Dense(units=conv2d_filters, activation='relu')(hidden) hidden = BatchNormalization()(hidden) final_output = Dense(units=2, activation='softmax', name='final_output')(hidden) self.classification = Model(inputs=[texts_in, leaves_in], outputs=[final_output]) self.classification.compile(optimizer='adam', loss=binary_crossentropy) # the whole model self.whole_model = Model( inputs=[texts_in, leaves_in], outputs=[texts_out, leaves_out, gen_leaves, final_output], name='whole_model') self.whole_model.compile(optimizer='adam', loss={ 'gen_leaves': mse, 'texts_decoder_out': mse, 'leaves_decoder_out': mse, 'final_output': binary_crossentropy }, loss_weights={ 'gen_leaves': 0.5, 'texts_decoder_out': 0.25, 'leaves_decoder_out': 0.25, 'final_output': 1. }) plot_model(self.whole_model, show_shapes=True, to_file='whole_model.png')
def create_model(self, embedding_dimensions, lstm_dimensions, dense_dimensions, optimizer, embeddings=None, embeddings_trainable=True): """ creates the neural network model, optionally using precomputed embeddings applied to the training data :return: """ num_words = len(self.tokenizer.word_index) logger.info('Creating a model based on %s unique tokens.', num_words) # create the shared embedding layer (with or without pre-trained weights) embedding_shared = None if embeddings is None: embedding_shared = Embedding(num_words + 1, embedding_dimensions, input_length=None, mask_zero=True, trainable=embeddings_trainable, name="embedding_shared") else: logger.info('Importing pre-trained word embeddings.') embeddings_index = load_embeddings(embeddings) # indices in word_index start with a 1, 0 is reserved for masking padded value embedding_matrix = np.zeros((num_words + 1, embedding_dimensions)) for word, i in self.tokenizer.word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector else: logger.warning('Word not found in embeddings: %s', word) embedding_shared = Embedding( num_words + 1, embedding_dimensions, input_length=None, mask_zero=True, trainable=embeddings_trainable, weights=[embedding_matrix], name="embedding_shared") input_state = Input(batch_shape=(None, None), name="input_state") input_action = Input(batch_shape=(None, None), name="input_action") embedding_state = embedding_shared(input_state) embedding_action = embedding_shared(input_action) lstm_shared = LSTM(lstm_dimensions, name="lstm_shared") lstm_state = lstm_shared(embedding_state) lstm_action = lstm_shared(embedding_action) dense_state = Dense(dense_dimensions, activation='tanh', name="dense_state")(lstm_state) dense_action = Dense(dense_dimensions, activation='tanh', name="dense_action")(lstm_action) model_state = Model(inputs=input_state, outputs=dense_state, name="state") model_action = Model(inputs=input_action, outputs=dense_action, name="action") self.model_state = model_state self.model_action = model_action input_dot_state = Input(shape=(dense_dimensions, )) input_dot_action = Input(shape=(dense_dimensions, )) dot_state_action = Dot(axes=-1, normalize=True, name="dot_state_action")( [input_dot_state, input_dot_action]) model_dot_state_action = Model( inputs=[input_dot_state, input_dot_action], outputs=dot_state_action, name="dot_state_action") self.model_dot_state_action = model_dot_state_action model = Model(inputs=[model_state.input, model_action.input], outputs=model_dot_state_action( [model_state.output, model_action.output]), name="model") model.compile(optimizer=optimizer, loss='mse') self.model = model print('---------------') print('Complete model:') model.summary() print('---------------')
def encoder_attention(self, encoder_inputs, enc_attn, init_states): # input : encoder_inputs [batch_size, time_steps, input_dim], # enc_attn :attentin weight for the first timestep (ones matrix by default) # init_states: initial states for encoder (zeros matrix by default) # return : encoder_output, encoder_state, encoder_att [h, s] = init_states encoder_att = [] encoder_output = [] global_att = self.global_att aux_att = self.aux_att #get input if global_att: local_inputs = encoder_inputs[0] global_inputs = encoder_inputs[1] else: local_inputs = encoder_inputs #get att states if global_att: local_attn = enc_attn[0] global_attn = enc_attn[1] else: local_attn = enc_attn #local attention #shared layer AddLayer = Add(name='add') PermuteLayer = Permute(dims=(2, 1)) ActTanh = Activation(activation='tanh', name='tanh_for_e') ActSoftmax = Activation(activation='softmax', name='softmax_for_alpha') def local_attention(states, step): #for attention query #linear map Ux = self.Ue(PermuteLayer(local_inputs)) #[none,input_dim,T] states = Concatenate(axis=1, name='state_{}'.format(step))(states) Whs = self.We(states) #[none, T] Whs = RepeatVector(local_inputs.shape[2])(Whs) #[none,input_dim,T] y = AddLayer([Ux, Whs]) e = self.Ve(ActTanh(y)) #[none,input_dim,1] e = PermuteLayer(e) #[none,1,input_dim] alpha = ActSoftmax(e) return alpha AddLayer2 = Add(name='add2') PermuteLayer2 = Permute(dims=(2, 1)) ActTanh2 = Activation(activation='tanh', name='tanh_for_e2') ActSoftmax2 = Activation(activation='softmax', name='softmax_for_beta') def global_attention(states, step, prior): #for global attention query #global inputs[0](values) [none, T, neighbornum] #linear map Wg states = Concatenate(axis=1, name='state_gl_{}'.format(step))(states) Wgs = self.Wg(states) #[none,T] Ugy = self.Ug(PermuteLayer2( global_inputs[0])) # [none, neighbornum, T] Wgs_ = RepeatVector(global_inputs[0].shape[2])( Wgs) # [none, neighbornum, T] y2 = AddLayer2([Wgs_, Ugy]) g = self.Vg(ActTanh2(y2)) g = PermuteLayer2(g) g_ = Lambda(lambda x: (1 - self.lamb) * x + self.lamb * prior)(g) beta = ActSoftmax2(g) return beta for t in range(self.T): if global_att: if aux_att: x = Lambda(lambda x: x[:, t, :], name='X_local_{}'.format(t))( local_inputs) #[none,input_dim] x = RepeatVector(1)( x) #[none,1,input_dim] , 1 denotes one time step [global_input_value, global_input_weight] = global_inputs x2 = Lambda(lambda x2: x2[:, t, :], name='X_global_{}'.format(t))( global_input_value) #[none,neighbornum] x2 = RepeatVector(1)( x2) #[none,1,neighbornum] , 1 denotes one time step prior = Lambda( lambda p: p[:, t, :], name='global_prior_{}'.format(t))( global_input_weight) #[none,neighbornum] prior = RepeatVector(1)(prior) local_x = Multiply(name='Xatt_local_{}'.format(t))( [local_attn, x]) #[none,1,input_dim] print('global_attn:', global_attn, 'x2:', x2) global_x = Dot(axes=(2), name='Xatt_global_{}'.format(t))( [global_attn, x2]) #global_x = Multiply(name = 'Xatt_global_{}'.format(t))([global_attn, x2]) att_x = Concatenate(axis=-1)([local_x, global_x]) o, h, s = self.enLSTM( att_x, initial_state=[h, s]) #o, h, s [none, hidden_dim] o = RepeatVector(1)(o) encoder_output.append(o) local_attn = local_attention([h, s], t + 1) global_attn = global_attention([h, s], t + 1, prior) encoder_att.append([local_attn, global_attn]) elif not aux_att: x = Lambda(lambda x: x[:, t, :], name='X_local_{}'.format(t))( local_inputs) #[none,input_dim] x = RepeatVector(1)( x) #[none,1,input_dim] , 1 denotes one time step [global_input_value, global_input_weight] = global_inputs x2 = Lambda(lambda x2: x2[:, t, :], name='X_global_{}'.format(t))( global_input_value) #[none,neighbornum] x2 = RepeatVector(1)( x2) #[none,1,neighbornum] , 1 denotes one time step prior = Lambda( lambda p: p[:, t, :], name='global_prior_{}'.format(t))( global_input_weight) #[none,neighbornum] prior = RepeatVector(1)(prior) #global_x = Multiply(name = 'Xatt_global_{}'.format(t))([global_attn, x2]) global_x = Dot(axes=(2), name='Xatt_global_{}'.format(t))( [global_attn, x2]) att_x = Concatenate(axis=-1)([x, global_x]) o, h, s = self.enLSTM( att_x, initial_state=[h, s]) #o, h, s [none, hidden_dim] o = RepeatVector(1)(o) encoder_output.append(o) global_attn = global_attention([h, s], t + 1, prior) encoder_att.append(global_attn) elif not global_att: if aux_att: x = Lambda(lambda x: x[:, t, :], name='X_{}'.format(t))( local_inputs) #[none,input_dim] x = RepeatVector(1)( x) #[none,1,input_dim] , 1 denotes one time step local_x = Multiply(name='Xatt_{}'.format(t))( [local_attn, x]) #[none,1,input_dim] o, h, s = self.enLSTM( local_x, initial_state=[h, s]) #o, h, s [none, hidden_dim] o = RepeatVector(1)(o) encoder_output.append(o) local_attn = local_attention([h, s], t + 1) encoder_att.append(local_attn) elif not aux_att: x = Lambda(lambda x: x[:, t, :], name='X_{}'.format(t))( local_inputs) #[none,input_dim] x = RepeatVector(1)( x) #[none,1,input_dim] , 1 denotes one time step o, h, s = self.enLSTM( x, initial_state=[h, s]) #o, h, s [none, hidden_dim] o = RepeatVector(1)(o) encoder_output.append(o) if global_att and aux_att: local_att = [i[0] for i in encoder_att] print('local_att', local_att) local_att = Concatenate(axis=1)(local_att) global_att = [i[1] for i in encoder_att] print('global_att', global_att) #global_att = Concatenate(axis = 1)(global_att) global_att = Lambda(lambda x: K.concatenate(x, axis=1))(global_att) encoder_att = [local_att, global_att] #elif global_att: # encoder_att = Concatenate(axis = 1,name = 'encoder_att')(encoder_att) #[none, T, input_dim] encoder_output = Concatenate(axis=1, name='encoder_output')(encoder_output) return encoder_output, [h, s], encoder_att
train_x, train_y, valid_x, valid_y, test_x, test_y, minimax_scaler = utils.get_data_attention( 'pollution.csv', 8, tx, ty) no_features = 8 # Defined shared layers as global variables repeator = RepeatVector(tx) concatenator = Concatenate(axis=-1) # densor1 = Dense(10, activation="tanh") densor2 = Dense(1, kernel_initializer="glorot_normal", bias_initializer='zeros') activator = Activation( softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded # in this notebook dot_operator = Dot(axes=1) def one_step_attention(a, s_prev): """ Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights "alphas" and the hidden states "a" of the Bi-LSTM. Arguments: a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a) s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s) Returns: context -- context vector, input of the next (post-attetion) LSTM cell """
label2id = {l:i for i,l in enumerate(set(labels))} id2label = {v:k for k,v in label2id.items()} print(id2label) y = [label2id[label] for label in labels] y = to_categorical(y, num_classes=len(label2id), dtype='float32') seq_input = Input(shape=(max_len, ), dtype='int32') embedded = Embedding(vocab_size, embedding_dim, input_length=max_len)(seq_input) embedded = Dropout(0.2)(embedded) lstm = Bidirectional(LSTM(embedding_dim, return_sequences=True))(embedded) lstm = Dropout(0.2)(lstm) att_vector = TimeDistributed(Dense(1))(lstm) att_vector = Reshape((max_len, ))(att_vector) att_vector = Activation('softmax', name = 'attention_layer')(att_vector) att_output = Dot(axes=1)([lstm, att_vector]) fc = Dense(embedding_dim, activation='relu')(att_output) output = Dense(len(label2id), activation='softmax')(fc) model = Model(inputs = [seq_input], outputs = output) print(model.summary()) model.compile(loss='categorical_crossentropy', metrics = ['accuracy'], optimizer='adam') model.fit(x_pad, y, epochs=2, batch_size=64, validation_split=0.2, shuffle=True, verbose=2) model_att = Model(input=model.input, outputs=[model.output, model.get_layer('attention_layer').output]) sample_text = random.choice(df['text'].values.tolist()) tokenized_sample = sample_text.split(" ") encoded_sample = [[word2id[word] for word in tokenized_sample]]
n_users = len(dataset.user_id.unique()) n_books = len(dataset.book_id.unique()) # creating book embedding path book_input = Input(shape=[1], name="Book-Input") book_embedding = Embedding(n_books+1, 5, name="Book-Embedding")(book_input) book_vec = Flatten(name="Flatten-Books")(book_embedding) # creating user embedding path user_input = Input(shape=[1], name="User-Input") user_embedding = Embedding(n_users+1, 5, name="User-Embedding")(user_input) user_vec = Flatten(name="Flatten-Users")(user_embedding) conc = Concatenate()([book_vec, user_vec]) # performing dot product and creating model prod = Dot(name="Dot-Product", axes=1)([book_vec, user_vec]) model = Model([user_input, book_input], prod) model.compile('adam', 'mean_squared_error') from keras.models import load_model if os.path.exists('regression_model.h5'): model = load_model('regression_model.h5') else: history = model.fit([train.user_id, train.book_id], train.rating, epochs=5, verbose=1) model.save('regression_model.h5') plt.plot(history.history['loss']) plt.xlabel("Epochs") plt.ylabel("Training Error")
N_HEADS, FILTERS, D_ATTENTION, D_ATTENTION, FILTERS, LAYER_DROPOUT, name="context_eeb")(highway_context) ## Context question attention concat = Concatenate(axis=1)([context_ff, question_ff]) lambda_concat = Lambda(attention)(concat) attention_dense = TimeDistributed( Dense(1, kernel_regularizer=l2(L2_REG), use_bias=False))(lambda_concat) attention_matrix = Reshape((MAX_CONTEXT, MAX_QUESTIONS))(attention_dense) attention_matrix_bar = Softmax()(attention_matrix) A = Dot(axes=(2, 1))([attention_matrix_bar, question_ff]) attention_matrix_transpose = Lambda( lambda x: K.permute_dimensions(x, (0, 2, 1)))(attention_matrix) attention_matrix_bar_bar = Softmax()(attention_matrix_transpose) B = Dot(axes=(2, 1))([attention_matrix_bar, attention_matrix_bar_bar]) B = Dot(axes=(2, 1))([B, context_ff]) ## Stacked model encoder blocks. A_attention = Multiply()([context_ff, A]) B_attention = Multiply()([context_ff, B]) stacked_blocks_input = Concatenate(axis=2)( [context_ff, A, A_attention, B_attention]) stacked_blocks_input = Dropout(LAYER_DROPOUT)
u=Input(shape=(1,),name='user') m=Input(shape=(1,),name='movie') r=Embedding(input_dim=int(np.amax(train[:,1])+1),input_length=1,output_dim=40,name='user_em')(u) p=Embedding(input_dim=int(np.amax(train[:,2])+1),input_length=1,output_dim=40,name='movie_em')(m) ''' cat=Concatenate()([r,p]) d=Dense(units=100,use_bias=True,activation='relu')(cat) output=Dense(units=1,use_bias=True,activation='sigmoid')(d) ''' r=Flatten()(r) p=Flatten()(p) dot=Dot(axes=1,name='dot')([r,p]) r=Dense(units=40,activation='relu')(r) p=Dense(units=40,activation='relu')(p) cat=Concatenate()([r,p]) bias=Dense(units=1,activation='relu')(cat) add=Add()([bias,dot]) # o=Flatten()(add) out=Dense(units=1,activation='relu')(add) # cat=Concatenate()([rb,pb,dot]) #out=Dense(units=1,activation='relu')(pb) model=Model(inputs=[u,m],outputs=[add]) model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
def L2X(datatype, train=True): # the whole thing is equation (5) x_train, y_train, x_val, y_val, datatype_val, input_shape = create_data( datatype, n=int(1e6)) st1 = time.time() st2 = st1 print(input_shape) activation = 'relu' # P(S|X) we train the model on this, for capturing the important features. model_input = Input(shape=(input_shape, ), dtype='float32') net = Dense(100, activation=activation, name='s/dense1', kernel_regularizer=regularizers.l2(1e-3))(model_input) net = Dense(100, activation=activation, name='s/dense2', kernel_regularizer=regularizers.l2(1e-3))(net) # A tensor of shape, [batch_size, max_sents, 100] mid_dim = input_shape * num_groups logits = Dense(mid_dim)(net) # [BATCH_SIZE, max_sents, 1] k = ks[datatype] tau = 0.1 samples = Sample_Concrete(tau, k, input_shape, num_groups, name='sample')(logits) samples = Reshape((input_shape, num_groups))(samples) samples = Permute((2, 1))(samples) def matmul_output_shape(input_shapes): shape1 = list(input_shapes[0]) shape2 = list(input_shapes[1]) return tuple((shape1[0], shape1[1])) matmul_layer = Lambda(lambda x: K.batch_dot(x[0], x[1]), output_shape=matmul_output_shape) new_model_input = matmul_layer([samples, model_input]) net2_list = [] # pdb.set_trace() for i in range(num_groups): temp = Lambda(lambda x: x[:, i, :], output_shape=lambda in_shape: (in_shape[0], in_shape[2]))(samples) temp2 = Lambda(lambda x: x[:, i, :] / (tf.math.maximum( tf.reduce_sum(x[:, i, :], axis=1, keepdims=True), 1e-12, name=None) ), output_shape=lambda in_shape: (in_shape[0], in_shape[2]))(samples) tau1 = 0.1 k = 1 x2 = Dot(axes=1, normalize=False)([model_input, temp]) x2d = RepeatVector(input_shape)(x2) x2d = Reshape((input_shape, ))(x2d) new2_temp = Multiply()([x2d, temp2]) net2_list.append(new2_temp) New_prime = Add()(net2_list) net = Dense(100, activation=activation, name='dense1', kernel_regularizer=regularizers.l2(1e-3))(New_prime) net = BatchNormalization()(net) # Add batchnorm for stability. net = Dense(100, activation=activation, name='dense2', kernel_regularizer=regularizers.l2(1e-3))(net) net = BatchNormalization()(net) preds = Dense(2, activation='softmax', name='dense4', kernel_regularizer=regularizers.l2(1e-3))(net) #### HERE IS FOR ANOTHER BRANCH I(Xg;X) activation = 'linear' model = Model(inputs=model_input, outputs=[preds, New_prime]) model.summary() if train: adam = optimizers.Adam(lr=1e-3) #### HERE CHANGE THE PROPORTION OF 2 WEIGHTS l1 = 15.0 l2 = 1.0 model.compile(loss=['categorical_crossentropy', 'mse'], loss_weights=[l1, l2], optimizer=adam, metrics=['acc']) filepath = "models/{}/L2X.hdf5".format(datatype) checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model.fit(x_train, [y_train, x_train], validation_data=(x_val, [y_val, x_val]), callbacks=callbacks_list, epochs=2, batch_size=BATCH_SIZE) st2 = time.time() else: model.load_weights('models/{}/L2X.hdf5'.format(datatype), by_name=True) pred_model = Model(model_input, [samples, preds]) pred_model.compile(loss=None, optimizer='rmsprop', metrics=[None]) # For now samples is a matrix instead of a vector scores, preds = pred_model.predict(x_val, verbose=1, batch_size=BATCH_SIZE) # We need to write a new compute_median_rank to do analysis # median_ranks = compute_median_rank(scores, k = ks[datatype], # datatype_val=datatype_val) median_ranks = compute_groups(scores) return median_ranks, time.time( ) - st2, st2 - st1, scores, x_val, y_val, datatype_val, preds
def single_layer_mf_image_withbias(n_users, n_items, n_factors): item_input = Input(shape=[1]) item_embedding = Embedding(n_items, n_factors, embeddings_regularizer=l2(1e-5))(item_input) item_vec = Flatten()(item_embedding) image_input = Input(shape=(224, 224, 3)) imgflow = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')(image_input) imgflow = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')(imgflow) imgflow = MaxPooling2D(pool_size=(4, 4))(imgflow) imgflow = Dropout(0.25)(imgflow) imgflow = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')(imgflow) imgflow = MaxPooling2D(pool_size=(4, 4))(imgflow) imgflow = Dropout(0.25)(imgflow) imgflow = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')(imgflow) imgflow = Dropout(0.25)(imgflow) imgflow = tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu')(imgflow) imgflow = Dropout(0.25)(imgflow) imgflow = Flatten()(imgflow) imgflow = Dense(512, activation='relu')(imgflow) imgflow = BatchNormalization()(imgflow) imgflow = Dense(256, activation='relu')(imgflow) imgflow = BatchNormalization()(imgflow) imgflow = Dense(128, activation='relu')(imgflow) Concat = tf.keras.layers.concatenate(inputs=[item_vec, imgflow], axis=1) Dense_1 = Dense(n_factors, kernel_initializer='glorot_normal')(Concat) item_bias = Embedding(n_items, 1, embeddings_regularizer=l2(1e-6))(item_input) item_bias_vec = Flatten()(item_bias) user_input = Input(shape=[1]) user_embedding = Embedding(n_users, n_factors, embeddings_regularizer=l2(1e-6))(user_input) user_vec = Flatten()(user_embedding) user_bias = Embedding(n_users, 1, embeddings_regularizer=l2(1e-6))(user_input) user_bias_vec = Flatten()(user_bias) DotProduct = Dot(axes=1)([Dense_1, user_vec]) AddBias = Add()([DotProduct, item_bias_vec, user_bias_vec]) y = Activation('sigmoid')(AddBias) rating_output = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(y) model = Model([user_input, item_input, image_input], rating_output) model.compile(loss='mean_squared_error', optimizer=Adam(lr=0.001)) return model
def singleModel(ss_story_maxlen, ss_story_maxsents, ss_question_maxlen, ss_vocab_size, ss_stories_train, ss_questions_train, ss_answers_train, ss_stories_test, ss_questions_test, ss_answers_test, embedding_dim, num_epochs, batch_size): ''' This function takes in training data and testing data for stories, question and answers, max lengths of story and question, maximum sentence in story, vocab size, embedding dimension, number of epochs and batch size. Returns the models and debugging models for single fact problem. Parameters: ss_story_maxlen (int) : The maximum number of words in sentences in the story ss_story_maxsents (int) : The maximum number of sentences in the story ss_question_maxlen (int) : The maximum number of question ss_vocab_size (int) : The size of the vocabulary ss_stories_train, ss_questions_train, ss_answers_train (numpy array) : A list of padded and vectorized stories, question and answers of training set ss_stories_test, ss_questions_test, ss_answers_test (numpy array) : A list of padded and vectorized stories, question and answers of testing set embedding_dim (int) : The size of embedding num_epochs (int) : The number of epochs batch_size (int) : The size of mini batches Returns: single_model (keras model) : The model trained on Single Fact Dataset single_debug_model (keras model) : The debug model for Single Fact Dataset ''' input_story = Input(shape = (ss_story_maxsents, ss_story_maxlen)) embedded_story = Embedding(ss_vocab_size, embedding_dim)(input_story) summed_across_words_story = Lambda(lambda x: K.sum(x, axis = 2))(embedded_story) # print(summed_across_words_story.shape) input_question = Input(shape = (ss_question_maxlen,)) embedded_question = Embedding(ss_vocab_size, embedding_dim)(input_question) # print(embedded_question.shape) summed_across_words_question = Lambda(lambda x: K.sum(x, axis = 1))(embedded_question) # print(summed_across_words_question.shape) summed_across_words_question = Reshape((1, embedding_dim))(summed_across_words_question) # print(summed_across_words_question.shape) x = Dot(axes = 2)([summed_across_words_story, summed_across_words_question]) # print(x.shape) x = Reshape((ss_story_maxsents,))(x) # print(x.shape) x = Activation('softmax')(x) sent_weights = Reshape((ss_story_maxsents, 1))(x) # print(sent_weights.shape) x = Dot(axes = 1)([sent_weights, summed_across_words_story]) # print(x.shape) x = Reshape((embedding_dim,))(x) # print(x.shape) out = Dense(ss_vocab_size, activation = 'softmax')(x) # print(out.shape) single_model = Model([input_story, input_question], out) single_model.compile(optimizer = RMSprop(lr = 1e-3), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy']) single_model.fit([ss_stories_train, ss_questions_train], ss_answers_train, \ epochs = num_epochs, batch_size = batch_size, validation_data = ([ss_stories_test, ss_questions_test], ss_answers_test)) single_debug_model = Model([input_story, input_question], sent_weights) return single_model, single_debug_model
def RESL_retrain(user, N, model_name): users = Interactions.query.with_entities(Interactions.userid).all() users = [int(u) for u, in users] movies = Interactions.query.with_entities(Interactions.movieid).all() movies = [int(m) for m, in movies] ratings = Interactions.query.with_entities(Interactions.rating).all() ratings = [r for r, in ratings] ratings_train = {'userid':users,'movieid':movies,'rating':ratings} ratings_train = pd.DataFrame(ratings_train) users = Validations.query.with_entities(Validations.userid).all() users = [int(u) for u, in users] movies = Validations.query.with_entities(Validations.movieid).all() movies = [int(m) for m, in movies] ratings = Validations.query.with_entities(Validations.rating).all() ratings = [r for r, in ratings] ratings_test = {'userid':users,'movieid':movies,'rating':ratings} ratings_test = pd.DataFrame(ratings_test) Kb.clear_session() K = 10 N = 100 M = 500 #Matrix Factorization Branch mu = ratings_train.rating.mean() epochs=40 reg = 0.15 u = Input(shape = (1,)) m = Input(shape = (1,)) u_embedding = Embedding(N,K,embeddings_regularizer = l2(reg))(u) m_embedding = Embedding(M,K,embeddings_regularizer = l2(reg))(m) u_bias = Embedding(N,1,embeddings_regularizer = l2(reg))(u) m_bias = Embedding(M,1,embeddings_regularizer = l2(reg))(m) x = Dot(axes = 2)([u_embedding,m_embedding]) x = Add()([x, u_bias, m_bias]) x = Flatten()(x) #ANN Branch reg = 0.2 u_embedding = Embedding(N,K,embeddings_regularizer = l2(reg))(u) m_embedding = Embedding(M,K,embeddings_regularizer = l2(reg))(m) u_embedding = Flatten()(u_embedding)#NxK m_embedding = Flatten()(m_embedding)#MxK y = Concatenate()([u_embedding, m_embedding])#Nx2K y = Dense(1000, activation = 'relu')(y) y = Dense(1)(y) #Residual Learning x = Add()([x, y]) model = Model(inputs=[u, m], outputs=x) model.compile( loss = 'mse', optimizer = 'adam', metrics = ['mse'] ) r = model.fit( x = [ratings_train.userid.values, ratings_train.movieid.values], y = ratings_train.rating.values - mu, epochs = epochs, batch_size = 64, validation_data = ( [ratings_test.userid.values, ratings_test.movieid.values], ratings_test.rating.values - mu ), shuffle = False ) model.save(os.path.join(os.path.dirname(__file__), model_name))
import numpy as np latent_dim = 5 num_movies = 100 num_users = 100 if __name__ == '__main__': movie_input = Input(shape=[1], name='movie-input') movie_embedding = Embedding(num_movies + 1, latent_dim, name='movie-embedding', embeddings_constraint=non_neg())(movie_input) movie_vec = Flatten(name='movie-flatten')(movie_embedding) user_input = Input(shape=[1], name='user-input') user_embedding = Embedding(num_users + 1, latent_dim, name='user-embedding', embeddings_constraint=non_neg())(user_input) user_vec = Flatten(name='user-flatten')(user_embedding) dot = Dot(axes=1, name='dot-product')([movie_vec, user_vec]) model = Model(inputs=[user_input, movie_input], outputs=dot) model.compile('adam', 'mean_squared_error') plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True) rand_users = np.random.randint(1, 100, size=(10, 1)) rand_movies = np.random.randint(1, 100, size=(10, 1)) print(model.predict([rand_users, rand_movies]))
encoder_outputs = encoder(x) # Set up the decoder ( not so simple step) decoder_inputs_placeholder = Input(shape=(max_len_target, )) # this word embedding will not use pre-trained vectors # although you could decoder_embedding = Embedding(num_words_output, EMBEDDING_DIM) decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder) #Attention attn_repeat_layer = RepeatVector(max_len_input) attn_concat_layer = Concatenate(axis=-1) attn_dense1 = Dense(10, activation='tanh') attn_dense2 = Dense(1, activation=softmax_over_time) attn_dot = Dot(axes=1) # to perform the weighted sum of alpha[t] * h[t] def one_step_attention(h, st_1): # h = h(1), ..., h(Tx), shape = (Tx, LATENT_DIM * 2) # st_1 = s(t-1), shape = (LATENT_DIM_DECODER,) # copy s(t-1) Tx times # now shape = (Tx, LATENT_DIM_DECODER) st_1 = attn_repeat_layer(st_1) # Concatenate all h(t)'s with s(t-1) # Now of shape (Tx, LATENT_DIM_DECODER + LATENT_DIM * 2) x = attn_concat_layer([h, st_1]) # Neural net first layer
def build(self): def xor_match(x): t1 = x[0] t2 = x[1] t1_shape = t1.get_shape() t2_shape = t2.get_shape() t1_expand = K.tf.stack([t1] * t2_shape[1], 2) t2_expand = K.tf.stack([t2] * t1_shape[1], 1) out_bool = K.tf.equal(t1_expand, t2_expand) out = K.tf.cast(out_bool, K.tf.float32) out = K.tf.expand_dims(out, 3) return out def gaussian_kernel_match(x): t1 = x[0] t2 = x[1] t1_shape = t1.get_shape() t2_shape = t2.get_shape() t1_expand = K.tf.stack([t1] * t2_shape[1], 2) t2_expand = K.tf.stack([t2] * t1_shape[1], 1) t_diff = K.tf.subtract(t1_expand, t2_expand) t_diff_norm = K.tf.norm( t_diff, ord='euclidean', axis=3) # L2-norm when input is a matrix and axis>0 out = K.tf.exp(K.tf.negative(K.tf.square(t_diff_norm))) # out = K.tf.expand_dims(out, 3) return out query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) # dpool_index = Input(name='dpool_index', shape=[self.config['text1_maxlen'], self.config['text2_maxlen'], 3], dtype='int32') # show_layer_info('Input', dpool_index) if self.config['similarity'] in ['dot', 'cosine', 'gaussian']: embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) if self.config['similarity'] == 'dot': cross = Dot(axes=[2, 2], normalize=False)([q_embed, d_embed]) show_layer_info('Dot', cross) if self.config['similarity'] == 'cosine': cross = Dot(axes=[2, 2], normalize=True)([q_embed, d_embed]) show_layer_info('Cosine', cross) if self.config['similarity'] == 'indicator': cross = Lambda(xor_match)([query, doc]) show_layer_info('Indicator', cross) if self.config['similarity'] == 'gaussian': cross = Lambda(gaussian_kernel_match)([q_embed, d_embed]) show_layer_info('Gaussian', cross) cross_reshape = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], 1))(cross) show_layer_info('Reshape', cross_reshape) conv2d = Conv2D(self.config['kernel_count'], self.config['kernel_size'], padding='same', activation='relu') # dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1]) maxpool = MaxPooling2D(pool_size=(self.config['dpool_size'][0], self.config['dpool_size'][1]), padding="valid") conv1 = conv2d(cross_reshape) show_layer_info('Conv2D', conv1) # pool1 = dpool([conv1, dpool_index]) # show_layer_info('DynamicMaxPooling', pool1) pool1 = maxpool(conv1) show_layer_info('MaxPooling2D', pool1) pool1_flat = Flatten()(pool1) show_layer_info('Flatten', pool1_flat) pool1_flat_drop = Dropout(rate=self.config['dropout_rate'])(pool1_flat) show_layer_info('Dropout', pool1_flat_drop) dense1 = Dense(128, activation='relu')(pool1_flat_drop) show_layer_info('Dense', dense1) # dense2 = Dense(128, activation='relu')(dense1) # show_layer_info('Dense', dense2) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(pool1_flat_drop) elif self.config['target_mode'] in ['regression', 'ranking']: # out_ = Dense(1)(pool1_flat_drop) out_ = Dense(1)(dense1) show_layer_info('Dense', out_) # model = Model(inputs=[query, doc, dpool_index], outputs=out_) model = Model(inputs=[query, doc], outputs=out_) return model
def create_model(self, pool_size_l=3, pool_size_q=3, optimizer='adam', metric='cos', lr=0.001): question = Input(shape=(self._max_seq_len,)) pos_label = Input(shape=(self._max_label_words_len,)) neg_label = Input(shape=(self._max_label_words_len,)) all_label = Input(shape=(None, self._max_label_words_len,)) all_label_rshp = Lambda( lambda x: K.reshape(x, shape=(K.shape(all_label)[0] * K.shape(all_label)[1], self._max_label_words_len)))( all_label) shared_emb = Embedding(self._nb_words, self._embed_dim, weights=[self._embedding_matrix], mask_zero=False, trainable=True) shared_label_encoder = AveragePooling1D(pool_size=pool_size_l, padding='same') # GlobalMaxPooling1D()# shared_question_encoder = AveragePooling1D(pool_size=pool_size_q, padding='same') # define metric layer if metric is mlp if metric == 'mlp': metric_layer_1 = Dense(units=128, activation='softplus') metric_layer_2 = Dense(units=64, activation='softplus') metric_layer_3 = Dense(units=1, activation='softplus') question_emb = shared_emb(question) pos_label_emb = shared_emb(pos_label) neg_label_emb = shared_emb(neg_label) all_label_emb = shared_emb(all_label_rshp) enc_question = shared_question_encoder(question_emb) enc_pos_label = shared_label_encoder(pos_label_emb) enc_neg_label = shared_label_encoder(neg_label_emb) enc_all_label = shared_label_encoder(all_label_emb) enc_question = GlobalMaxPooling1D()(enc_question) enc_pos_label = GlobalMaxPooling1D()(enc_pos_label) enc_neg_label = GlobalMaxPooling1D()(enc_neg_label) enc_all_label = GlobalMaxPooling1D()(enc_all_label) enc_all_label_rshp = Lambda( lambda x: K.reshape(x, shape=(K.shape(all_label)[0], K.shape(all_label)[1], self._embed_dim)))( enc_all_label) if metric == 'cos': distance_pos = Dot(axes=1, normalize=True)([enc_question, enc_pos_label]) distance_neg = Dot(axes=1, normalize=True)([enc_question, enc_neg_label]) distance_all = CosineSim()([enc_question, enc_all_label_rshp]) elif metric == 'mlp': distance_pos = Concatenate(axis=-1)([enc_question, enc_pos_label]) distance_pos = metric_layer_1(distance_pos) distance_pos = metric_layer_2(distance_pos) distance_pos = metric_layer_3(distance_pos) distance_neg = Concatenate(axis=-1)([enc_question, enc_neg_label]) distance_neg = metric_layer_1(distance_neg) distance_neg = metric_layer_2(distance_neg) distance_neg = metric_layer_3(distance_neg) enc_question_rshp = Lambda(lambda x: K.repeat(x, K.shape(all_label)[1]))(enc_question) enc_l_q_conc = Concatenate(axis=-1)([enc_question_rshp, enc_all_label_rshp]) enc_l_q_conc_rshp = Lambda( lambda x: K.reshape(x, shape=(K.shape(all_label)[0] * K.shape(all_label)[1], 2 * self._embed_dim)))( enc_l_q_conc) distance_all = metric_layer_1(enc_l_q_conc_rshp) distance_all = metric_layer_2(distance_all) distance_all = metric_layer_3(distance_all) distance_all = Lambda(lambda x: K.reshape(x, shape=(K.shape(all_label)[0], K.shape(all_label)[1])))( distance_all) ranking_loss = losses.hinge.hinge_loss(distance_pos, distance_neg) acc = sim.accuracy(distance_pos, distance_neg) model = Model([question, pos_label, neg_label, all_label], distance_all) adam = keras.optimizers.Adam(lr=lr) model.compile(loss=ranking_loss, optimizer=adam, metrics=[acc]) self.model = model
def DateConvert(DATELIST): m = 10000 dataset, human_vocab, machine_vocab, inv_machine_vocab = load_dataset(m) dataset[:10] Tx = 30 Ty = 10 X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty) # - `X`: a processed version of the human readable dates in the training set, where each character is replaced by an index mapped to the character via `human_vocab`. Each date is further padded to $T_x$ values with a special character (< pad >). `X.shape = (m, Tx)` # - `Y`: a processed version of the machine readable dates in the training set, where each character is replaced by the index it is mapped to in `machine_vocab`. You should have `Y.shape = (m, Ty)`. # - `Xoh`: one-hot version of `X`, the "1" entry's index is mapped to the character thanks to `human_vocab`. `Xoh.shape = (m, Tx, len(human_vocab))` # - `Yoh`: one-hot version of `Y`, the "1" entry's index is mapped to the character thanks to `machine_vocab`. `Yoh.shape = (m, Tx, len(machine_vocab))`. Here, `len(machine_vocab) = 11` since there are 11 characters ('-' as well as 0-9). # Defined shared layers as global variables repeator = RepeatVector(Tx) concatenator = Concatenate(axis=-1) densor = Dense(1, activation = "relu") activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook dotor = Dot(axes = 1) def one_step_attention(a, s_prev): """ Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights "alphas" and the hidden states "a" of the Bi-LSTM. Arguments: a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a) s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s) Returns: context -- context vector, input of the next (post-attetion) LSTM cell """ s_prev = repeator(s_prev) concat = concatenator([a, s_prev]) e = densor(concat) # Use activator and e to compute the attention weights "alphas" (≈ 1 line) alphas = activator(e) # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line) context = dotor([alphas, a]) return context n_a = 64 n_s = 128 post_activation_LSTM_cell = LSTM(n_s, return_state = True) output_layer = Dense(len(machine_vocab), activation=softmax) def model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size): """ Arguments: Tx -- length of the input sequence Ty -- length of the output sequence n_a -- hidden state size of the Bi-LSTM n_s -- hidden state size of the post-attention LSTM human_vocab_size -- size of the python dictionary "human_vocab" machine_vocab_size -- size of the python dictionary "machine_vocab" Returns: model -- Keras model instance """ # Define the inputs of your model with a shape (Tx,) # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,) X = Input(shape=(Tx, human_vocab_size)) s0 = Input(shape=(n_s,), name='s0') c0 = Input(shape=(n_s,), name='c0') s = s0 c = c0 # Initialize empty list of outputs outputs = [] # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line) a = Bidirectional(LSTM(n_a, return_sequences=True))(X) # Step 2: Iterate for Ty steps for t in range(Ty): # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line) context = one_step_attention(a, s) # Step 2.B: Apply the post-attention LSTM cell to the "context" vector. # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line) s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c]) # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line) out = output_layer(s) # Step 2.D: Append "out" to the "outputs" list (≈ 1 line) outputs.append(out) # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line) model = Model(inputs = [X, s0, c0], outputs = outputs) return model model = model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab)) # model.summary() out = model.compile(optimizer=Adam(lr=0.005, beta_1=0.9, beta_2=0.999, decay=0.01), metrics=['accuracy'], loss='categorical_crossentropy') s0 = np.zeros((m, n_s)) c0 = np.zeros((m, n_s)) outputs = list(Yoh.swapaxes(0,1)) model.fit([Xoh, s0, c0], outputs, epochs=10, batch_size=100) # model.save('date_recognizer_model.h5') # model.save_weights('date_recognizer_weights.h5') # EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001'] # for example in EXAMPLES: outputDateList =[] for example in DATELIST: source = string_to_int(example, Tx, human_vocab) source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source)), ndmin=3) prediction = model.predict([source, s0, c0]) prediction = np.argmax(prediction, axis = -1) output = [inv_machine_vocab[int(i)] for i in prediction] print("source:", example) print("output:", ''.join(output)) output = ''.join(output) outputDateList.append(output) return outputDateList
image_conv4 = Conv2D(32, (3,3), activation='relu', padding='same', strides=2)(image_conv3) image_conv5 = Conv2D(21, (3,3), activation='relu', padding='same', strides=2)(image_conv4) image_encoder = Reshape((256,21))(image_conv5) #decoder language_input = Input(shape=(max_length,)) padding_input = Input(shape=(2, 21, )) language_model = Embedding(vocab_size, 21, input_length=max_length, mask_zero=False)(language_input) padding_language = concatenate([padding_input, language_model], axis=1) #1st block decoder_conv = Conv1D(21, 3, padding='valid')(padding_language) decoder_gate = Conv1D(21, 3, padding='valid', activation='sigmoid')(padding_language) decoder_glu = Multiply()([decoder_conv, decoder_gate]) decoder_1 = Add()([language_model, decoder_glu]) #attention attention_matrix = Dot(axes=2)([decoder_1, image_encoder]) attention_softmax = Activation('softmax')(attention_matrix) image_encoderTrans = Reshape((21,256))(image_encoder) decoder_c = Dot(axes=2)([attention_softmax, image_encoderTrans]) decoder_2 = Add()([decoder_1, decoder_c]) decoder_softmax = Dense(21, activation='softmax')(decoder_2) #model my_model = Model(inputs=[image_input, language_input, padding_input], outputs=decoder_softmax) my_model.load_weights("/rap_blues/lunwen/convs2s/358_complete/org-weights-epoch-200--val_loss-0.3155--loss-0.2762.hdf5") #prediction actual, predicted = list(), list() for i in range(len(texts)): print(i) yhat = generate_desc(my_model, tokenizer, train_features[i], max_length) print('\n\nReal---->\n\n' + texts[i])
def main(save=True, save_dir=os.getcwd(), load_model=None): """ Arguments: save : save model after training to directory save_dir: save trained models to this directory if not provided will save in current working directory return: trained/ loaded model, parameters of models, vocbulary """ global model, params, vocab params1 = { 'm': 1000, 'n_a': 32, 'n_s': 64, 'Tx': 30, # Max length of input 'Ty': 10 # o/p length "YYYY-MM-DD" } dataset, human_vocab, machine_vocab, inv_machine_vocab = nmt_utils.load_dataset( params1['m']) X, Y, Xoh, Yoh = nmt_utils.preprocess_data(dataset, human_vocab, machine_vocab, params1['Tx'], params1['Ty']) params2 = { 'machine_vocab_size': len(machine_vocab), 'human_vocab_size': len(human_vocab) } params = {**params1, **params2} hparams = {'lr': 0.005, 'beta_1': 0.9, 'beta_2': 0.999, 'decay': 0.01} vocab = { 'human_vocab': human_vocab, 'machine_vocab': machine_vocab, 'inv_machine_vocab': inv_machine_vocab } # Defined shared layers as global variables global repeator, concatenator, densor1, densor2, activator, dotor repeator = RepeatVector(params['Tx']) concatenator = Concatenate(axis=-1) densor1 = Dense(10, activation="tanh") densor2 = Dense(1, activation="relu") activator = Activation( nmt_utils.softmax, name='attention_weights' ) # We are using a custom softmax(axis = 1) loaded from nmt_utils_utils dotor = Dot(axes=1) if save: model, _ = myModel(Xoh, Yoh, **params, **hparams) print("Saving weights...") model.save_weights(os.path.join(save_dir, 'date_model_epoch1.h5')) print("Weight Saved!") else: if load_model == None: raise FileNotFoundError( 'Please provide valid model path along with model name!') else: model = create_model(**params) print("loading pretrained weights...") model.load_weights('models/date_model_epoch15.h5') print("date_model_epoch15.h5 weights loaded!!") return model, params, vocab
image_conv5 = Conv2D(21, (3,3), activation='relu', padding='same', strides=2)(image_conv4) image_encoder = Reshape((256,21))(image_conv5) image_encoderTrans = Reshape((21,256))(image_encoder) #decoder language_input = Input(shape=(max_length,)) padding_input = Input(shape=(2, 21, )) language_model = Embedding(vocab_size, 21, input_length=max_length, mask_zero=False)(language_input) # 1st block padding_language = concatenate([padding_input, language_model], axis=1) decoder_conv1 = Conv1D(21, 3, padding='valid')(padding_language) decoder_gate1 = Conv1D(21, 3, padding='valid', activation='sigmoid')(padding_language) decoder_glu1 = Multiply()([decoder_conv1, decoder_gate1]) decoder_1 = Add()([language_model, decoder_glu1]) # 1st attention attention_matrix1 = Dot(axes=2)([decoder_1, image_encoder]) attention_softmax1 = Activation('softmax')(attention_matrix1) decoder_c1 = Dot(axes=2)([attention_softmax1, image_encoderTrans]) decoder_2i = Add()([decoder_1, decoder_c1]) # 2nd block decoder_2o = concatenate([padding_input, decoder_2i], axis=1) decoder_conv2 = Conv1D(21, 3, padding='valid')(decoder_2o) decoder_gate2 = Conv1D(21, 3, padding='valid', activation='sigmoid')(decoder_2o) decoder_glu2 = Multiply()([decoder_conv2, decoder_gate2]) decoder_2 = Add()([decoder_2i, decoder_glu2]) # 2nd attention attention_matrix2 = Dot(axes=2)([decoder_2, image_encoder]) attention_softmax2 = Activation('softmax')(attention_matrix2) decoder_c2 = Dot(axes=2)([attention_softmax2, image_encoderTrans]) decoder_3i = Add()([decoder_2, decoder_c2]) # 3rd block
# * [Dot()](https://keras.io/layers/merge/#dot) # ```Python # dot_product = dot_layer([var1,var2]) # ``` # In[56]: # Defined shared layers as global variables repeator = RepeatVector(Tx) concatenator = Concatenate(axis=-1) densor1 = Dense(10, activation="tanh") densor2 = Dense(1, activation="relu") activator = Activation( softmax, name='attention_weights' ) # We are using a custom softmax(axis = 1) loaded in this notebook dotor = Dot(axes=1) # In[57]: # GRADED FUNCTION: one_step_attention def one_step_attention(a, s_prev): """ Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights "alphas" and the hidden states "a" of the Bi-LSTM. Arguments: a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a) s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
m = Input(shape=(1, )) u_embedding = Embedding(N, K, embeddings_regularizer=l2(reg))(u) # (N, 1, K) m_embedding = Embedding(M, K, embeddings_regularizer=l2(reg))(m) # (N, 1, K) # subsubmodel = Model([u, m], [u_embedding, m_embedding]) # user_ids = df_train.userId.values[0:5] # movie_ids = df_train.movie_idx.values[0:5] # print("user_ids.shape", user_ids.shape) # p = subsubmodel.predict([user_ids, movie_ids]) # print("p[0].shape:", p[0].shape) # print("p[1].shape:", p[1].shape) # print(p) u_bias = Embedding(N, 1, embeddings_regularizer=l2(reg))(u) # (N, 1, 1) m_bias = Embedding(M, 1, embeddings_regularizer=l2(reg))(m) # (N, 1, 1) x = Dot(axes=2)([u_embedding, m_embedding]) # (N, 1, 1) # submodel = Model([u, m], x) # user_ids = df_train.userId.values[0:5] # movie_ids = df_train.movie_idx.values[0:5] # p = submodel.predict([user_ids, movie_ids]) # print("p.shape:", p.shape) # exit() x = Add()([x, u_bias, m_bias]) x = Flatten()(x) # (N, 1) x = BatchNormalization()(x) x = Dropout(0.1)(x) x = Dense(64, activation='relu')(x) x = BatchNormalization()(x)