def make_embedding(vocab_size, wv_size, init=None, fixed=False, constraint=ConstNorm(3.0, True), **kwargs): ''' Takes parameters and makes a word vector embedding Args: ------ vocab_size: integer -- how many words in your vocabulary wv_size: how big do you want the word vectors init: initial word vectors -- defaults to None. If you specify initial word vectors, needs to be an np.array of shape (vocab_size, wv_size) fixed: boolean -- do you want the word vectors fixed or not? Returns: --------- a Keras Embedding layer ''' if (init is not None) and len(init.shape) == 2: emb = Embedding(vocab_size, wv_size, weights=[init], W_constraint=constraint) # keras needs a list for initializations else: emb = Embedding(vocab_size, wv_size, W_constraint=constraint) # keras needs a list for initializations if fixed: emb.trainable = False # emb.params = [] return emb
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len( word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[ 0] # define dimensionality of your GloVe word vectors (= 50) # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim) emb_matrix = np.zeros((vocab_len, emb_dim)) # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. embedding_layer = Embedding(vocab_len, emb_dim) embedding_layer.trainable = False # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None". embedding_layer.build((None, )) # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len(word_to_index) + 1 emb_dim = word_to_vec_map['cucumber'].shape[0] emb_matrix = np.zeros((vocab_len, emb_dim)) for word, index in word_to_index.items(): emb_matrix[index, :] = word_to_vec_map[word] embedding_layer = Embedding(vocab_len, emb_dim) embedding_layer.build((None, )) embedding_layer.set_weights([emb_matrix]) embedding_layer.trainable = False return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index): """ Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors. Arguments: word_to_vec_map -- dictionary mapping words to their GloVe vector representation. word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words) Returns: embedding_layer -- pretrained layer Keras instance """ vocab_len = len( word_to_index) + 1 # adding 1 to fit Keras embedding (requirement) emb_dim = word_to_vec_map["cucumber"].shape[ 0] # define dimensionality of your GloVe word vectors (= 50) ### START CODE HERE ### # Step 1 # Initialize the embedding matrix as a numpy array of zeros. # See instructions above to choose the correct shape. emb_matrix = np.zeros((vocab_len, emb_dim)) # Step 2 # Set each row "idx" of the embedding matrix to be # the word vector representation of the idx'th word of the vocabulary for word, idx in word_to_index.items(): emb_matrix[idx, :] = word_to_vec_map[word] # Step 3 # Define Keras embedding layer with the correct input and output sizes # Make it non-trainable. embedding_layer = Embedding(input_dim=vocab_len, output_dim=emb_dim) embedding_layer.trainable = False ### END CODE HERE ### # Step 4 (already done for you; please do not modify) # Build the embedding layer, it is required before setting the weights of the embedding layer. embedding_layer.build( (None, )) # Do not modify the "None". This line of code is complete as-is. # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained. embedding_layer.set_weights([emb_matrix]) return embedding_layer
parser.add_argument('--tr-data', help='training data') parser.add_argument('--max-seq-len', type=int, default=30, help='training data') args = parser.parse_args() #source of embeddings wv_model=lwvlib.load(args.embeddings,args.edim,args.edim) d=Data(wv_model) with open(args.tr_data) as tr_f: class_indices, data_matrix=d.read(tr_f,args) class_indices_1hot=keras.utils.to_categorical(class_indices) inp_seq=Input(shape=(args.max_seq_len,), name="words", dtype='int32') inp_embeddings=Embedding(*wv_model.vectors.shape, input_length=args.max_seq_len, mask_zero=False, weights=[wv_model.vectors]) inp_embeddings.trainable=False text_src=inp_embeddings(inp_seq) #gru1_out=GRU(100,name="gru1")(text_src) cnn1_out=Conv1D(100,2,padding="same")(text_src) pooled=Flatten()(MaxPooling1D(pool_size=args.max_seq_len, strides=None, padding='valid')(cnn1_out)) do=Dropout(0.3)(pooled) dense1=Dense(50,activation="relu")(do) dense_out=Dense(np.max(class_indices)+1,activation='softmax', name="dec")(dense1) model=Model(input=[inp_seq],output=dense_out) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit([data_matrix],class_indices_1hot,batch_size=10,epochs=100,verbose=2,validation_split=0.2)
def build(self, learn_context_weights=True): content_forward = Input(shape=(None, ), dtype='int32', name='content_forward') content_backward = Input(shape=(None, ), dtype='int32', name='content_backward') context = Input(shape=(1, ), dtype='int32', name='context') if learn_context_weights: context_weights = None else: context_weights = [self.word2vec_model.syn1neg] context_embedding = Embedding(input_dim=len(self.iterator.word_index), output_dim=256, input_length=1, weights=context_weights) if not learn_context_weights: context_embedding.trainable = False context_flat = Flatten()(context_embedding(context)) char_embedding = Embedding(input_dim=29, output_dim=64, mask_zero=True) embed_forward = char_embedding(content_forward) embed_backward = char_embedding(content_backward) rnn_forward = LSTM(output_dim=256, return_sequences=True, activation='tanh')(embed_forward) backwards_lstm = LSTM(output_dim=256, return_sequences=True, activation='tanh', go_backwards=True) def reverse_tensor(inputs, mask): return inputs[:, ::-1, :] def reverse_tensor_shape(input_shapes): return input_shapes reverse = Lambda(reverse_tensor, output_shape=reverse_tensor_shape) reverse.supports_masking = True rnn_backward = reverse(backwards_lstm(embed_backward)) rnn_bidi = TimeDistributed(Dense(output_dim=256))(merge( [rnn_forward, rnn_backward], mode='concat')) attention_1 = TimeDistributed( Dense(output_dim=256, activation='tanh', bias=False))(rnn_bidi) attention_2 = TimeDistributed( Dense(output_dim=1, activity_regularizer='activity_l2', bias=False))(attention_1) def attn_merge(inputs, mask): vectors = inputs[0] logits = inputs[1] # Flatten the logits and take a softmax logits = K.squeeze(logits, axis=2) pre_softmax = K.switch(mask[0], logits, -numpy.inf) weights = K.expand_dims(K.softmax(pre_softmax)) return K.sum(vectors * weights, axis=1) def attn_merge_shape(input_shapes): return (input_shapes[0][0], input_shapes[0][2]) attn = Lambda(attn_merge, output_shape=attn_merge_shape) attn.supports_masking = True attn.compute_mask = lambda inputs, mask: None content_flat = attn([rnn_bidi, attention_2]) output = Activation('sigmoid', name='output')(merge([content_flat, context_flat], mode='dot', dot_axes=(1, 1))) model = Model(input=[content_forward, content_backward, context], output=output) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) inputs = [content_forward, content_backward] self._predict = K.function(inputs, content_flat) self._attention = K.function(inputs, K.squeeze(attention_2, axis=2)) self.model = model
def model(data, units, model_flag='avg', reg=0.00, dropout_rate=0.5, threshold=1, num_layers=1, filter_nums=None): '''set up''' reduced_embedding_matrix = data.reduced_embedding_matrix maxlen_explain = data.lengths.maxlen_exp maxlen_question = data.lengths.maxlen_question if model_flag == 'baseline': pass elif model_flag == 'avg': model_flag = model_flag + str(num_layers) elif model_flag == 'cnn': if filter_nums == None: filter_nums = [10, 10, 10, 10, 10, 10, 10] else: raise ('invalid model_flag.') ''' define some layers''' RNN = get_rnn(model_flag=model_flag, units=units, num_layers=num_layers, dropout=dropout_rate, reg=reg, filter_nums=filter_nums) Cosine_similarity = Lambda(cosine_similarity, name='Cosine_similarity') Hinge_loss = Lambda(lambda inputs: hinge_loss(inputs, threshold), name='loss') Glove_embedding = Embedding(input_dim=reduced_embedding_matrix.shape[0], output_dim=reduced_embedding_matrix.shape[1], weights=[reduced_embedding_matrix], name='glove_embedding') Glove_embedding.trainable = False '''define model''' e_input = Input((maxlen_explain, ), name='explanation') q_input = Input((maxlen_question, ), name='question') e = Glove_embedding(e_input) q = Glove_embedding(q_input) e = Dropout(0.5)(e) q = Dropout(0.5)(q) eq = Concatenate(axis=1)([e, q]) eq = RNN(eq) pos_ans_input = Input((23, )) neg_ans1_input = Input((23, )) neg_ans2_input = Input((23, )) neg_ans3_input = Input((23, )) pos_ans = Glove_embedding(pos_ans_input) neg_ans1 = Glove_embedding(neg_ans1_input) neg_ans2 = Glove_embedding(neg_ans2_input) neg_ans3 = Glove_embedding(neg_ans3_input) pos_ans = Dropout(0.5)(pos_ans) neg_ans1 = Dropout(0.5)(neg_ans1) neg_ans2 = Dropout(0.5)(neg_ans2) neg_ans3 = Dropout(0.5)(neg_ans3) pos_ans = RNN(pos_ans) neg_ans1 = RNN(neg_ans1) neg_ans2 = RNN(neg_ans2) neg_ans3 = RNN(neg_ans3) pos_similarity = Cosine_similarity([eq, pos_ans]) neg_similarity1 = Cosine_similarity([eq, neg_ans1]) neg_similarity2 = Cosine_similarity([eq, neg_ans2]) neg_similarity3 = Cosine_similarity([eq, neg_ans3]) loss = Hinge_loss([pos_similarity, neg_similarity1]) predictions = Concatenate(axis=-1, name='prediction')( [pos_similarity, neg_similarity1, neg_similarity2, neg_similarity3]) ''' define training_model and prediction_model''' training_model = Model( inputs=[e_input, q_input, pos_ans_input, neg_ans1_input], outputs=loss) Wsave = training_model.get_weights() prediction_model = Model(inputs=[ e_input, q_input, pos_ans_input, neg_ans1_input, neg_ans2_input, neg_ans3_input ], outputs=predictions) return training_model, prediction_model, Wsave, model_flag
def base(self, inputs, sample_shape=(None, ), drop=.2): #####Variable model inputs inputs_dic = {} for it in inputs: inputs_dic[it] = Input(shape=sample_shape, name=str(it)) self.input_cols = inputs_dic.keys() embeddings = Embedding(input_dim=len(self.embeddings_list[0][0]), output_dim=self.embeddings_list[0][1], weights=[self.embeddings_list[0][2]], name='encoder_embeddings') embeddings.trainable = self.train_embeds ############################################################ ####### Main encoder ############################################################ encoder_lstm_inputs = None if len(self.input_cols) > 1: encoder_lstm_inputs = Concatenate()( [embeddings(in_val) for in_val in inputs_dic.values()]) else: encoder_lstm_inputs = embeddings(list(inputs_dic.values())[0]) encoder_lstm = Bidirectional( LSTM( self.rnn_units, activation='relu', return_sequences=True, return_state=True, )) encoder_outputs, fh, fc, bh, bc = encoder_lstm(encoder_lstm_inputs) encoded_states = [fh, fc, bh, bc] ############################################################ ####### Both decoder_layers ############################################################ # Training decoder inputs decoder_inputs = Input(shape=(None, ), name='training_decoder_input') decoder_embeddings = Embedding(input_dim=len( self.embeddings_list[-1][0]), output_dim=self.embeddings_list[-1][1], weights=[self.embeddings_list[-1][2]], name='decoder_embeddings') decoder_embeddings.trainable = self.train_embeds decoder_data = decoder_embeddings(decoder_inputs) # Training decoder layers decoder_LSTM = Bidirectional( LSTM( self.rnn_units, activation='relu', return_sequences=True, return_state=True, name='decoder_lstm_relu', )) ############################################################ ####### NN1 LSTM output layers ############################################################ #CM Lstm Add'l layers self.crf = CRF(len(self.embeddings_list[-1][0]), learn_mode='marginal') ############################################################ ####### Both NNs implementation ############################################################ # Training decoder implementation decoder_outputs1 = decoder_LSTM(decoder_data, initial_state=encoded_states) decs = Dropout(drop)(decoder_outputs1[0]) ############################################################ ####### NN1 implementation ############################################################ train_out = self.crf(decoder_outputs2_0[0]) train_model = Model( list(inputs_dic.values()) + [decoder_inputs], train_out) train_model.compile(optimizer='adam', loss=self.crf.loss_function, metrics=[self.crf.accuracy]) print('decoder_1 (NN1): {}'.format(train_model.metrics_names)) self.nn1 = train_model ############################################################ ####### Decoder state data ############################################################ #representations_output = Concatenate()(decoder_outputs2_0[1:]+decoder_outputs1[1:]) representations = Model( list(inputs_dic.values()) + [decoder_inputs], outputs=decoder_outputs2_0[1:] + decoder_outputs1[1:] #decoder_outputs2_0+decoder_outputs1 ) self.nn_rep = representations